1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
31 #include "stringpool.h"
38 #include "diagnostic.h"
41 #include "fold-const.h"
44 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
58 #include "tm-constrs.h"
61 #include "sched-int.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
80 /* This file should be included last. */
81 #include "target-def.h"
83 static rtx legitimize_dllimport_symbol (rtx, bool);
84 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
85 static rtx legitimize_pe_coff_symbol (rtx, bool);
86 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
88 #ifndef CHECK_STACK_LIMIT
89 #define CHECK_STACK_LIMIT (-1)
92 /* Return index of given mode in mult and division cost tables. */
93 #define MODE_INDEX(mode) \
94 ((mode) == QImode ? 0 \
95 : (mode) == HImode ? 1 \
96 : (mode) == SImode ? 2 \
97 : (mode) == DImode ? 3 \
100 /* Processor costs (relative to an add) */
101 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
102 #define COSTS_N_BYTES(N) ((N) * 2)
104 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
106 static stringop_algs ix86_size_memcpy[2] = {
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
108 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
109 static stringop_algs ix86_size_memset[2] = {
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
115 COSTS_N_BYTES (2), /* cost of an add instruction */
116 COSTS_N_BYTES (3), /* cost of a lea instruction */
117 COSTS_N_BYTES (2), /* variable shift costs */
118 COSTS_N_BYTES (3), /* constant shift costs */
119 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
120 COSTS_N_BYTES (3), /* HI */
121 COSTS_N_BYTES (3), /* SI */
122 COSTS_N_BYTES (3), /* DI */
123 COSTS_N_BYTES (5)}, /* other */
124 0, /* cost of multiply per each bit set */
125 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
126 COSTS_N_BYTES (3), /* HI */
127 COSTS_N_BYTES (3), /* SI */
128 COSTS_N_BYTES (3), /* DI */
129 COSTS_N_BYTES (5)}, /* other */
130 COSTS_N_BYTES (3), /* cost of movsx */
131 COSTS_N_BYTES (3), /* cost of movzx */
132 0, /* "large" insn */
134 2, /* cost for loading QImode using movzbl */
135 {2, 2, 2}, /* cost of loading integer registers
136 in QImode, HImode and SImode.
137 Relative to reg-reg move (2). */
138 {2, 2, 2}, /* cost of storing integer registers */
139 2, /* cost of reg,reg fld/fst */
140 {2, 2, 2}, /* cost of loading fp registers
141 in SFmode, DFmode and XFmode */
142 {2, 2, 2}, /* cost of storing fp registers
143 in SFmode, DFmode and XFmode */
144 3, /* cost of moving MMX register */
145 {3, 3}, /* cost of loading MMX registers
146 in SImode and DImode */
147 {3, 3}, /* cost of storing MMX registers
148 in SImode and DImode */
149 3, /* cost of moving SSE register */
150 {3, 3, 3}, /* cost of loading SSE registers
151 in SImode, DImode and TImode */
152 {3, 3, 3}, /* cost of storing SSE registers
153 in SImode, DImode and TImode */
154 3, /* MMX or SSE register to integer */
155 0, /* size of l1 cache */
156 0, /* size of l2 cache */
157 0, /* size of prefetch block */
158 0, /* number of parallel prefetches */
160 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
161 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
162 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
163 COSTS_N_BYTES (2), /* cost of FABS instruction. */
164 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
165 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
168 1, /* scalar_stmt_cost. */
169 1, /* scalar load_cost. */
170 1, /* scalar_store_cost. */
171 1, /* vec_stmt_cost. */
172 1, /* vec_to_scalar_cost. */
173 1, /* scalar_to_vec_cost. */
174 1, /* vec_align_load_cost. */
175 1, /* vec_unalign_load_cost. */
176 1, /* vec_store_cost. */
177 1, /* cond_taken_branch_cost. */
178 1, /* cond_not_taken_branch_cost. */
181 /* Processor costs (relative to an add) */
182 static stringop_algs i386_memcpy[2] = {
183 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
184 DUMMY_STRINGOP_ALGS};
185 static stringop_algs i386_memset[2] = {
186 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
187 DUMMY_STRINGOP_ALGS};
190 struct processor_costs i386_cost = { /* 386 specific costs */
191 COSTS_N_INSNS (1), /* cost of an add instruction */
192 COSTS_N_INSNS (1), /* cost of a lea instruction */
193 COSTS_N_INSNS (3), /* variable shift costs */
194 COSTS_N_INSNS (2), /* constant shift costs */
195 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
196 COSTS_N_INSNS (6), /* HI */
197 COSTS_N_INSNS (6), /* SI */
198 COSTS_N_INSNS (6), /* DI */
199 COSTS_N_INSNS (6)}, /* other */
200 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
201 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
202 COSTS_N_INSNS (23), /* HI */
203 COSTS_N_INSNS (23), /* SI */
204 COSTS_N_INSNS (23), /* DI */
205 COSTS_N_INSNS (23)}, /* other */
206 COSTS_N_INSNS (3), /* cost of movsx */
207 COSTS_N_INSNS (2), /* cost of movzx */
208 15, /* "large" insn */
210 4, /* cost for loading QImode using movzbl */
211 {2, 4, 2}, /* cost of loading integer registers
212 in QImode, HImode and SImode.
213 Relative to reg-reg move (2). */
214 {2, 4, 2}, /* cost of storing integer registers */
215 2, /* cost of reg,reg fld/fst */
216 {8, 8, 8}, /* cost of loading fp registers
217 in SFmode, DFmode and XFmode */
218 {8, 8, 8}, /* cost of storing fp registers
219 in SFmode, DFmode and XFmode */
220 2, /* cost of moving MMX register */
221 {4, 8}, /* cost of loading MMX registers
222 in SImode and DImode */
223 {4, 8}, /* cost of storing MMX registers
224 in SImode and DImode */
225 2, /* cost of moving SSE register */
226 {4, 8, 16}, /* cost of loading SSE registers
227 in SImode, DImode and TImode */
228 {4, 8, 16}, /* cost of storing SSE registers
229 in SImode, DImode and TImode */
230 3, /* MMX or SSE register to integer */
231 0, /* size of l1 cache */
232 0, /* size of l2 cache */
233 0, /* size of prefetch block */
234 0, /* number of parallel prefetches */
236 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
237 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
238 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
239 COSTS_N_INSNS (22), /* cost of FABS instruction. */
240 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
241 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
244 1, /* scalar_stmt_cost. */
245 1, /* scalar load_cost. */
246 1, /* scalar_store_cost. */
247 1, /* vec_stmt_cost. */
248 1, /* vec_to_scalar_cost. */
249 1, /* scalar_to_vec_cost. */
250 1, /* vec_align_load_cost. */
251 2, /* vec_unalign_load_cost. */
252 1, /* vec_store_cost. */
253 3, /* cond_taken_branch_cost. */
254 1, /* cond_not_taken_branch_cost. */
257 static stringop_algs i486_memcpy[2] = {
258 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
259 DUMMY_STRINGOP_ALGS};
260 static stringop_algs i486_memset[2] = {
261 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
262 DUMMY_STRINGOP_ALGS};
265 struct processor_costs i486_cost = { /* 486 specific costs */
266 COSTS_N_INSNS (1), /* cost of an add instruction */
267 COSTS_N_INSNS (1), /* cost of a lea instruction */
268 COSTS_N_INSNS (3), /* variable shift costs */
269 COSTS_N_INSNS (2), /* constant shift costs */
270 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
271 COSTS_N_INSNS (12), /* HI */
272 COSTS_N_INSNS (12), /* SI */
273 COSTS_N_INSNS (12), /* DI */
274 COSTS_N_INSNS (12)}, /* other */
275 1, /* cost of multiply per each bit set */
276 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
277 COSTS_N_INSNS (40), /* HI */
278 COSTS_N_INSNS (40), /* SI */
279 COSTS_N_INSNS (40), /* DI */
280 COSTS_N_INSNS (40)}, /* other */
281 COSTS_N_INSNS (3), /* cost of movsx */
282 COSTS_N_INSNS (2), /* cost of movzx */
283 15, /* "large" insn */
285 4, /* cost for loading QImode using movzbl */
286 {2, 4, 2}, /* cost of loading integer registers
287 in QImode, HImode and SImode.
288 Relative to reg-reg move (2). */
289 {2, 4, 2}, /* cost of storing integer registers */
290 2, /* cost of reg,reg fld/fst */
291 {8, 8, 8}, /* cost of loading fp registers
292 in SFmode, DFmode and XFmode */
293 {8, 8, 8}, /* cost of storing fp registers
294 in SFmode, DFmode and XFmode */
295 2, /* cost of moving MMX register */
296 {4, 8}, /* cost of loading MMX registers
297 in SImode and DImode */
298 {4, 8}, /* cost of storing MMX registers
299 in SImode and DImode */
300 2, /* cost of moving SSE register */
301 {4, 8, 16}, /* cost of loading SSE registers
302 in SImode, DImode and TImode */
303 {4, 8, 16}, /* cost of storing SSE registers
304 in SImode, DImode and TImode */
305 3, /* MMX or SSE register to integer */
306 4, /* size of l1 cache. 486 has 8kB cache
307 shared for code and data, so 4kB is
308 not really precise. */
309 4, /* size of l2 cache */
310 0, /* size of prefetch block */
311 0, /* number of parallel prefetches */
313 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
314 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
315 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
316 COSTS_N_INSNS (3), /* cost of FABS instruction. */
317 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
318 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
321 1, /* scalar_stmt_cost. */
322 1, /* scalar load_cost. */
323 1, /* scalar_store_cost. */
324 1, /* vec_stmt_cost. */
325 1, /* vec_to_scalar_cost. */
326 1, /* scalar_to_vec_cost. */
327 1, /* vec_align_load_cost. */
328 2, /* vec_unalign_load_cost. */
329 1, /* vec_store_cost. */
330 3, /* cond_taken_branch_cost. */
331 1, /* cond_not_taken_branch_cost. */
334 static stringop_algs pentium_memcpy[2] = {
335 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
336 DUMMY_STRINGOP_ALGS};
337 static stringop_algs pentium_memset[2] = {
338 {libcall, {{-1, rep_prefix_4_byte, false}}},
339 DUMMY_STRINGOP_ALGS};
342 struct processor_costs pentium_cost = {
343 COSTS_N_INSNS (1), /* cost of an add instruction */
344 COSTS_N_INSNS (1), /* cost of a lea instruction */
345 COSTS_N_INSNS (4), /* variable shift costs */
346 COSTS_N_INSNS (1), /* constant shift costs */
347 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
348 COSTS_N_INSNS (11), /* HI */
349 COSTS_N_INSNS (11), /* SI */
350 COSTS_N_INSNS (11), /* DI */
351 COSTS_N_INSNS (11)}, /* other */
352 0, /* cost of multiply per each bit set */
353 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
354 COSTS_N_INSNS (25), /* HI */
355 COSTS_N_INSNS (25), /* SI */
356 COSTS_N_INSNS (25), /* DI */
357 COSTS_N_INSNS (25)}, /* other */
358 COSTS_N_INSNS (3), /* cost of movsx */
359 COSTS_N_INSNS (2), /* cost of movzx */
360 8, /* "large" insn */
362 6, /* cost for loading QImode using movzbl */
363 {2, 4, 2}, /* cost of loading integer registers
364 in QImode, HImode and SImode.
365 Relative to reg-reg move (2). */
366 {2, 4, 2}, /* cost of storing integer registers */
367 2, /* cost of reg,reg fld/fst */
368 {2, 2, 6}, /* cost of loading fp registers
369 in SFmode, DFmode and XFmode */
370 {4, 4, 6}, /* cost of storing fp registers
371 in SFmode, DFmode and XFmode */
372 8, /* cost of moving MMX register */
373 {8, 8}, /* cost of loading MMX registers
374 in SImode and DImode */
375 {8, 8}, /* cost of storing MMX registers
376 in SImode and DImode */
377 2, /* cost of moving SSE register */
378 {4, 8, 16}, /* cost of loading SSE registers
379 in SImode, DImode and TImode */
380 {4, 8, 16}, /* cost of storing SSE registers
381 in SImode, DImode and TImode */
382 3, /* MMX or SSE register to integer */
383 8, /* size of l1 cache. */
384 8, /* size of l2 cache */
385 0, /* size of prefetch block */
386 0, /* number of parallel prefetches */
388 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
389 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
390 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
391 COSTS_N_INSNS (1), /* cost of FABS instruction. */
392 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
393 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
396 1, /* scalar_stmt_cost. */
397 1, /* scalar load_cost. */
398 1, /* scalar_store_cost. */
399 1, /* vec_stmt_cost. */
400 1, /* vec_to_scalar_cost. */
401 1, /* scalar_to_vec_cost. */
402 1, /* vec_align_load_cost. */
403 2, /* vec_unalign_load_cost. */
404 1, /* vec_store_cost. */
405 3, /* cond_taken_branch_cost. */
406 1, /* cond_not_taken_branch_cost. */
410 struct processor_costs lakemont_cost = {
411 COSTS_N_INSNS (1), /* cost of an add instruction */
412 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
413 COSTS_N_INSNS (1), /* variable shift costs */
414 COSTS_N_INSNS (1), /* constant shift costs */
415 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
416 COSTS_N_INSNS (11), /* HI */
417 COSTS_N_INSNS (11), /* SI */
418 COSTS_N_INSNS (11), /* DI */
419 COSTS_N_INSNS (11)}, /* other */
420 0, /* cost of multiply per each bit set */
421 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
422 COSTS_N_INSNS (25), /* HI */
423 COSTS_N_INSNS (25), /* SI */
424 COSTS_N_INSNS (25), /* DI */
425 COSTS_N_INSNS (25)}, /* other */
426 COSTS_N_INSNS (3), /* cost of movsx */
427 COSTS_N_INSNS (2), /* cost of movzx */
428 8, /* "large" insn */
430 6, /* cost for loading QImode using movzbl */
431 {2, 4, 2}, /* cost of loading integer registers
432 in QImode, HImode and SImode.
433 Relative to reg-reg move (2). */
434 {2, 4, 2}, /* cost of storing integer registers */
435 2, /* cost of reg,reg fld/fst */
436 {2, 2, 6}, /* cost of loading fp registers
437 in SFmode, DFmode and XFmode */
438 {4, 4, 6}, /* cost of storing fp registers
439 in SFmode, DFmode and XFmode */
440 8, /* cost of moving MMX register */
441 {8, 8}, /* cost of loading MMX registers
442 in SImode and DImode */
443 {8, 8}, /* cost of storing MMX registers
444 in SImode and DImode */
445 2, /* cost of moving SSE register */
446 {4, 8, 16}, /* cost of loading SSE registers
447 in SImode, DImode and TImode */
448 {4, 8, 16}, /* cost of storing SSE registers
449 in SImode, DImode and TImode */
450 3, /* MMX or SSE register to integer */
451 8, /* size of l1 cache. */
452 8, /* size of l2 cache */
453 0, /* size of prefetch block */
454 0, /* number of parallel prefetches */
456 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
457 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
458 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
459 COSTS_N_INSNS (1), /* cost of FABS instruction. */
460 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
461 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
464 1, /* scalar_stmt_cost. */
465 1, /* scalar load_cost. */
466 1, /* scalar_store_cost. */
467 1, /* vec_stmt_cost. */
468 1, /* vec_to_scalar_cost. */
469 1, /* scalar_to_vec_cost. */
470 1, /* vec_align_load_cost. */
471 2, /* vec_unalign_load_cost. */
472 1, /* vec_store_cost. */
473 3, /* cond_taken_branch_cost. */
474 1, /* cond_not_taken_branch_cost. */
477 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
478 (we ensure the alignment). For small blocks inline loop is still a
479 noticeable win, for bigger blocks either rep movsl or rep movsb is
480 way to go. Rep movsb has apparently more expensive startup time in CPU,
481 but after 4K the difference is down in the noise. */
482 static stringop_algs pentiumpro_memcpy[2] = {
483 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
484 {8192, rep_prefix_4_byte, false},
485 {-1, rep_prefix_1_byte, false}}},
486 DUMMY_STRINGOP_ALGS};
487 static stringop_algs pentiumpro_memset[2] = {
488 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
489 {8192, rep_prefix_4_byte, false},
490 {-1, libcall, false}}},
491 DUMMY_STRINGOP_ALGS};
493 struct processor_costs pentiumpro_cost = {
494 COSTS_N_INSNS (1), /* cost of an add instruction */
495 COSTS_N_INSNS (1), /* cost of a lea instruction */
496 COSTS_N_INSNS (1), /* variable shift costs */
497 COSTS_N_INSNS (1), /* constant shift costs */
498 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
499 COSTS_N_INSNS (4), /* HI */
500 COSTS_N_INSNS (4), /* SI */
501 COSTS_N_INSNS (4), /* DI */
502 COSTS_N_INSNS (4)}, /* other */
503 0, /* cost of multiply per each bit set */
504 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
505 COSTS_N_INSNS (17), /* HI */
506 COSTS_N_INSNS (17), /* SI */
507 COSTS_N_INSNS (17), /* DI */
508 COSTS_N_INSNS (17)}, /* other */
509 COSTS_N_INSNS (1), /* cost of movsx */
510 COSTS_N_INSNS (1), /* cost of movzx */
511 8, /* "large" insn */
513 2, /* cost for loading QImode using movzbl */
514 {4, 4, 4}, /* cost of loading integer registers
515 in QImode, HImode and SImode.
516 Relative to reg-reg move (2). */
517 {2, 2, 2}, /* cost of storing integer registers */
518 2, /* cost of reg,reg fld/fst */
519 {2, 2, 6}, /* cost of loading fp registers
520 in SFmode, DFmode and XFmode */
521 {4, 4, 6}, /* cost of storing fp registers
522 in SFmode, DFmode and XFmode */
523 2, /* cost of moving MMX register */
524 {2, 2}, /* cost of loading MMX registers
525 in SImode and DImode */
526 {2, 2}, /* cost of storing MMX registers
527 in SImode and DImode */
528 2, /* cost of moving SSE register */
529 {2, 2, 8}, /* cost of loading SSE registers
530 in SImode, DImode and TImode */
531 {2, 2, 8}, /* cost of storing SSE registers
532 in SImode, DImode and TImode */
533 3, /* MMX or SSE register to integer */
534 8, /* size of l1 cache. */
535 256, /* size of l2 cache */
536 32, /* size of prefetch block */
537 6, /* number of parallel prefetches */
539 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
540 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
541 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
542 COSTS_N_INSNS (2), /* cost of FABS instruction. */
543 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
544 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
547 1, /* scalar_stmt_cost. */
548 1, /* scalar load_cost. */
549 1, /* scalar_store_cost. */
550 1, /* vec_stmt_cost. */
551 1, /* vec_to_scalar_cost. */
552 1, /* scalar_to_vec_cost. */
553 1, /* vec_align_load_cost. */
554 2, /* vec_unalign_load_cost. */
555 1, /* vec_store_cost. */
556 3, /* cond_taken_branch_cost. */
557 1, /* cond_not_taken_branch_cost. */
560 static stringop_algs geode_memcpy[2] = {
561 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
562 DUMMY_STRINGOP_ALGS};
563 static stringop_algs geode_memset[2] = {
564 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
565 DUMMY_STRINGOP_ALGS};
567 struct processor_costs geode_cost = {
568 COSTS_N_INSNS (1), /* cost of an add instruction */
569 COSTS_N_INSNS (1), /* cost of a lea instruction */
570 COSTS_N_INSNS (2), /* variable shift costs */
571 COSTS_N_INSNS (1), /* constant shift costs */
572 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
573 COSTS_N_INSNS (4), /* HI */
574 COSTS_N_INSNS (7), /* SI */
575 COSTS_N_INSNS (7), /* DI */
576 COSTS_N_INSNS (7)}, /* other */
577 0, /* cost of multiply per each bit set */
578 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
579 COSTS_N_INSNS (23), /* HI */
580 COSTS_N_INSNS (39), /* SI */
581 COSTS_N_INSNS (39), /* DI */
582 COSTS_N_INSNS (39)}, /* other */
583 COSTS_N_INSNS (1), /* cost of movsx */
584 COSTS_N_INSNS (1), /* cost of movzx */
585 8, /* "large" insn */
587 1, /* cost for loading QImode using movzbl */
588 {1, 1, 1}, /* cost of loading integer registers
589 in QImode, HImode and SImode.
590 Relative to reg-reg move (2). */
591 {1, 1, 1}, /* cost of storing integer registers */
592 1, /* cost of reg,reg fld/fst */
593 {1, 1, 1}, /* cost of loading fp registers
594 in SFmode, DFmode and XFmode */
595 {4, 6, 6}, /* cost of storing fp registers
596 in SFmode, DFmode and XFmode */
598 1, /* cost of moving MMX register */
599 {1, 1}, /* cost of loading MMX registers
600 in SImode and DImode */
601 {1, 1}, /* cost of storing MMX registers
602 in SImode and DImode */
603 1, /* cost of moving SSE register */
604 {1, 1, 1}, /* cost of loading SSE registers
605 in SImode, DImode and TImode */
606 {1, 1, 1}, /* cost of storing SSE registers
607 in SImode, DImode and TImode */
608 1, /* MMX or SSE register to integer */
609 64, /* size of l1 cache. */
610 128, /* size of l2 cache. */
611 32, /* size of prefetch block */
612 1, /* number of parallel prefetches */
614 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (1), /* cost of FABS instruction. */
618 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
622 1, /* scalar_stmt_cost. */
623 1, /* scalar load_cost. */
624 1, /* scalar_store_cost. */
625 1, /* vec_stmt_cost. */
626 1, /* vec_to_scalar_cost. */
627 1, /* scalar_to_vec_cost. */
628 1, /* vec_align_load_cost. */
629 2, /* vec_unalign_load_cost. */
630 1, /* vec_store_cost. */
631 3, /* cond_taken_branch_cost. */
632 1, /* cond_not_taken_branch_cost. */
635 static stringop_algs k6_memcpy[2] = {
636 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
637 DUMMY_STRINGOP_ALGS};
638 static stringop_algs k6_memset[2] = {
639 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
640 DUMMY_STRINGOP_ALGS};
642 struct processor_costs k6_cost = {
643 COSTS_N_INSNS (1), /* cost of an add instruction */
644 COSTS_N_INSNS (2), /* cost of a lea instruction */
645 COSTS_N_INSNS (1), /* variable shift costs */
646 COSTS_N_INSNS (1), /* constant shift costs */
647 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
648 COSTS_N_INSNS (3), /* HI */
649 COSTS_N_INSNS (3), /* SI */
650 COSTS_N_INSNS (3), /* DI */
651 COSTS_N_INSNS (3)}, /* other */
652 0, /* cost of multiply per each bit set */
653 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
654 COSTS_N_INSNS (18), /* HI */
655 COSTS_N_INSNS (18), /* SI */
656 COSTS_N_INSNS (18), /* DI */
657 COSTS_N_INSNS (18)}, /* other */
658 COSTS_N_INSNS (2), /* cost of movsx */
659 COSTS_N_INSNS (2), /* cost of movzx */
660 8, /* "large" insn */
662 3, /* cost for loading QImode using movzbl */
663 {4, 5, 4}, /* cost of loading integer registers
664 in QImode, HImode and SImode.
665 Relative to reg-reg move (2). */
666 {2, 3, 2}, /* cost of storing integer registers */
667 4, /* cost of reg,reg fld/fst */
668 {6, 6, 6}, /* cost of loading fp registers
669 in SFmode, DFmode and XFmode */
670 {4, 4, 4}, /* cost of storing fp registers
671 in SFmode, DFmode and XFmode */
672 2, /* cost of moving MMX register */
673 {2, 2}, /* cost of loading MMX registers
674 in SImode and DImode */
675 {2, 2}, /* cost of storing MMX registers
676 in SImode and DImode */
677 2, /* cost of moving SSE register */
678 {2, 2, 8}, /* cost of loading SSE registers
679 in SImode, DImode and TImode */
680 {2, 2, 8}, /* cost of storing SSE registers
681 in SImode, DImode and TImode */
682 6, /* MMX or SSE register to integer */
683 32, /* size of l1 cache. */
684 32, /* size of l2 cache. Some models
685 have integrated l2 cache, but
686 optimizing for k6 is not important
687 enough to worry about that. */
688 32, /* size of prefetch block */
689 1, /* number of parallel prefetches */
691 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
692 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
693 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
694 COSTS_N_INSNS (2), /* cost of FABS instruction. */
695 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
696 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
699 1, /* scalar_stmt_cost. */
700 1, /* scalar load_cost. */
701 1, /* scalar_store_cost. */
702 1, /* vec_stmt_cost. */
703 1, /* vec_to_scalar_cost. */
704 1, /* scalar_to_vec_cost. */
705 1, /* vec_align_load_cost. */
706 2, /* vec_unalign_load_cost. */
707 1, /* vec_store_cost. */
708 3, /* cond_taken_branch_cost. */
709 1, /* cond_not_taken_branch_cost. */
712 /* For some reason, Athlon deals better with REP prefix (relative to loops)
713 compared to K8. Alignment becomes important after 8 bytes for memcpy and
714 128 bytes for memset. */
715 static stringop_algs athlon_memcpy[2] = {
716 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
717 DUMMY_STRINGOP_ALGS};
718 static stringop_algs athlon_memset[2] = {
719 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
720 DUMMY_STRINGOP_ALGS};
722 struct processor_costs athlon_cost = {
723 COSTS_N_INSNS (1), /* cost of an add instruction */
724 COSTS_N_INSNS (2), /* cost of a lea instruction */
725 COSTS_N_INSNS (1), /* variable shift costs */
726 COSTS_N_INSNS (1), /* constant shift costs */
727 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
728 COSTS_N_INSNS (5), /* HI */
729 COSTS_N_INSNS (5), /* SI */
730 COSTS_N_INSNS (5), /* DI */
731 COSTS_N_INSNS (5)}, /* other */
732 0, /* cost of multiply per each bit set */
733 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
734 COSTS_N_INSNS (26), /* HI */
735 COSTS_N_INSNS (42), /* SI */
736 COSTS_N_INSNS (74), /* DI */
737 COSTS_N_INSNS (74)}, /* other */
738 COSTS_N_INSNS (1), /* cost of movsx */
739 COSTS_N_INSNS (1), /* cost of movzx */
740 8, /* "large" insn */
742 4, /* cost for loading QImode using movzbl */
743 {3, 4, 3}, /* cost of loading integer registers
744 in QImode, HImode and SImode.
745 Relative to reg-reg move (2). */
746 {3, 4, 3}, /* cost of storing integer registers */
747 4, /* cost of reg,reg fld/fst */
748 {4, 4, 12}, /* cost of loading fp registers
749 in SFmode, DFmode and XFmode */
750 {6, 6, 8}, /* cost of storing fp registers
751 in SFmode, DFmode and XFmode */
752 2, /* cost of moving MMX register */
753 {4, 4}, /* cost of loading MMX registers
754 in SImode and DImode */
755 {4, 4}, /* cost of storing MMX registers
756 in SImode and DImode */
757 2, /* cost of moving SSE register */
758 {4, 4, 6}, /* cost of loading SSE registers
759 in SImode, DImode and TImode */
760 {4, 4, 5}, /* cost of storing SSE registers
761 in SImode, DImode and TImode */
762 5, /* MMX or SSE register to integer */
763 64, /* size of l1 cache. */
764 256, /* size of l2 cache. */
765 64, /* size of prefetch block */
766 6, /* number of parallel prefetches */
768 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
769 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
770 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
771 COSTS_N_INSNS (2), /* cost of FABS instruction. */
772 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
773 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
776 1, /* scalar_stmt_cost. */
777 1, /* scalar load_cost. */
778 1, /* scalar_store_cost. */
779 1, /* vec_stmt_cost. */
780 1, /* vec_to_scalar_cost. */
781 1, /* scalar_to_vec_cost. */
782 1, /* vec_align_load_cost. */
783 2, /* vec_unalign_load_cost. */
784 1, /* vec_store_cost. */
785 3, /* cond_taken_branch_cost. */
786 1, /* cond_not_taken_branch_cost. */
789 /* K8 has optimized REP instruction for medium sized blocks, but for very
790 small blocks it is better to use loop. For large blocks, libcall can
791 do nontemporary accesses and beat inline considerably. */
792 static stringop_algs k8_memcpy[2] = {
793 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
794 {-1, rep_prefix_4_byte, false}}},
795 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
796 {-1, libcall, false}}}};
797 static stringop_algs k8_memset[2] = {
798 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
799 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
800 {libcall, {{48, unrolled_loop, false},
801 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
803 struct processor_costs k8_cost = {
804 COSTS_N_INSNS (1), /* cost of an add instruction */
805 COSTS_N_INSNS (2), /* cost of a lea instruction */
806 COSTS_N_INSNS (1), /* variable shift costs */
807 COSTS_N_INSNS (1), /* constant shift costs */
808 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
809 COSTS_N_INSNS (4), /* HI */
810 COSTS_N_INSNS (3), /* SI */
811 COSTS_N_INSNS (4), /* DI */
812 COSTS_N_INSNS (5)}, /* other */
813 0, /* cost of multiply per each bit set */
814 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
815 COSTS_N_INSNS (26), /* HI */
816 COSTS_N_INSNS (42), /* SI */
817 COSTS_N_INSNS (74), /* DI */
818 COSTS_N_INSNS (74)}, /* other */
819 COSTS_N_INSNS (1), /* cost of movsx */
820 COSTS_N_INSNS (1), /* cost of movzx */
821 8, /* "large" insn */
823 4, /* cost for loading QImode using movzbl */
824 {3, 4, 3}, /* cost of loading integer registers
825 in QImode, HImode and SImode.
826 Relative to reg-reg move (2). */
827 {3, 4, 3}, /* cost of storing integer registers */
828 4, /* cost of reg,reg fld/fst */
829 {4, 4, 12}, /* cost of loading fp registers
830 in SFmode, DFmode and XFmode */
831 {6, 6, 8}, /* cost of storing fp registers
832 in SFmode, DFmode and XFmode */
833 2, /* cost of moving MMX register */
834 {3, 3}, /* cost of loading MMX registers
835 in SImode and DImode */
836 {4, 4}, /* cost of storing MMX registers
837 in SImode and DImode */
838 2, /* cost of moving SSE register */
839 {4, 3, 6}, /* cost of loading SSE registers
840 in SImode, DImode and TImode */
841 {4, 4, 5}, /* cost of storing SSE registers
842 in SImode, DImode and TImode */
843 5, /* MMX or SSE register to integer */
844 64, /* size of l1 cache. */
845 512, /* size of l2 cache. */
846 64, /* size of prefetch block */
847 /* New AMD processors never drop prefetches; if they cannot be performed
848 immediately, they are queued. We set number of simultaneous prefetches
849 to a large constant to reflect this (it probably is not a good idea not
850 to limit number of prefetches at all, as their execution also takes some
852 100, /* number of parallel prefetches */
854 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
855 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
856 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
857 COSTS_N_INSNS (2), /* cost of FABS instruction. */
858 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
859 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
863 4, /* scalar_stmt_cost. */
864 2, /* scalar load_cost. */
865 2, /* scalar_store_cost. */
866 5, /* vec_stmt_cost. */
867 0, /* vec_to_scalar_cost. */
868 2, /* scalar_to_vec_cost. */
869 2, /* vec_align_load_cost. */
870 3, /* vec_unalign_load_cost. */
871 3, /* vec_store_cost. */
872 3, /* cond_taken_branch_cost. */
873 2, /* cond_not_taken_branch_cost. */
876 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
877 very small blocks it is better to use loop. For large blocks, libcall can
878 do nontemporary accesses and beat inline considerably. */
879 static stringop_algs amdfam10_memcpy[2] = {
880 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
881 {-1, rep_prefix_4_byte, false}}},
882 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
883 {-1, libcall, false}}}};
884 static stringop_algs amdfam10_memset[2] = {
885 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
886 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
887 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
888 {-1, libcall, false}}}};
889 struct processor_costs amdfam10_cost = {
890 COSTS_N_INSNS (1), /* cost of an add instruction */
891 COSTS_N_INSNS (2), /* cost of a lea instruction */
892 COSTS_N_INSNS (1), /* variable shift costs */
893 COSTS_N_INSNS (1), /* constant shift costs */
894 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
895 COSTS_N_INSNS (4), /* HI */
896 COSTS_N_INSNS (3), /* SI */
897 COSTS_N_INSNS (4), /* DI */
898 COSTS_N_INSNS (5)}, /* other */
899 0, /* cost of multiply per each bit set */
900 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
901 COSTS_N_INSNS (35), /* HI */
902 COSTS_N_INSNS (51), /* SI */
903 COSTS_N_INSNS (83), /* DI */
904 COSTS_N_INSNS (83)}, /* other */
905 COSTS_N_INSNS (1), /* cost of movsx */
906 COSTS_N_INSNS (1), /* cost of movzx */
907 8, /* "large" insn */
909 4, /* cost for loading QImode using movzbl */
910 {3, 4, 3}, /* cost of loading integer registers
911 in QImode, HImode and SImode.
912 Relative to reg-reg move (2). */
913 {3, 4, 3}, /* cost of storing integer registers */
914 4, /* cost of reg,reg fld/fst */
915 {4, 4, 12}, /* cost of loading fp registers
916 in SFmode, DFmode and XFmode */
917 {6, 6, 8}, /* cost of storing fp registers
918 in SFmode, DFmode and XFmode */
919 2, /* cost of moving MMX register */
920 {3, 3}, /* cost of loading MMX registers
921 in SImode and DImode */
922 {4, 4}, /* cost of storing MMX registers
923 in SImode and DImode */
924 2, /* cost of moving SSE register */
925 {4, 4, 3}, /* cost of loading SSE registers
926 in SImode, DImode and TImode */
927 {4, 4, 5}, /* cost of storing SSE registers
928 in SImode, DImode and TImode */
929 3, /* MMX or SSE register to integer */
931 MOVD reg64, xmmreg Double FSTORE 4
932 MOVD reg32, xmmreg Double FSTORE 4
934 MOVD reg64, xmmreg Double FADD 3
936 MOVD reg32, xmmreg Double FADD 3
938 64, /* size of l1 cache. */
939 512, /* size of l2 cache. */
940 64, /* size of prefetch block */
941 /* New AMD processors never drop prefetches; if they cannot be performed
942 immediately, they are queued. We set number of simultaneous prefetches
943 to a large constant to reflect this (it probably is not a good idea not
944 to limit number of prefetches at all, as their execution also takes some
946 100, /* number of parallel prefetches */
948 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
949 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
950 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
951 COSTS_N_INSNS (2), /* cost of FABS instruction. */
952 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
953 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
957 4, /* scalar_stmt_cost. */
958 2, /* scalar load_cost. */
959 2, /* scalar_store_cost. */
960 6, /* vec_stmt_cost. */
961 0, /* vec_to_scalar_cost. */
962 2, /* scalar_to_vec_cost. */
963 2, /* vec_align_load_cost. */
964 2, /* vec_unalign_load_cost. */
965 2, /* vec_store_cost. */
966 2, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
971 very small blocks it is better to use loop. For large blocks, libcall
972 can do nontemporary accesses and beat inline considerably. */
973 static stringop_algs bdver1_memcpy[2] = {
974 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
975 {-1, rep_prefix_4_byte, false}}},
976 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
977 {-1, libcall, false}}}};
978 static stringop_algs bdver1_memset[2] = {
979 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
980 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
981 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
982 {-1, libcall, false}}}};
984 const struct processor_costs bdver1_cost = {
985 COSTS_N_INSNS (1), /* cost of an add instruction */
986 COSTS_N_INSNS (1), /* cost of a lea instruction */
987 COSTS_N_INSNS (1), /* variable shift costs */
988 COSTS_N_INSNS (1), /* constant shift costs */
989 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
990 COSTS_N_INSNS (4), /* HI */
991 COSTS_N_INSNS (4), /* SI */
992 COSTS_N_INSNS (6), /* DI */
993 COSTS_N_INSNS (6)}, /* other */
994 0, /* cost of multiply per each bit set */
995 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
996 COSTS_N_INSNS (35), /* HI */
997 COSTS_N_INSNS (51), /* SI */
998 COSTS_N_INSNS (83), /* DI */
999 COSTS_N_INSNS (83)}, /* other */
1000 COSTS_N_INSNS (1), /* cost of movsx */
1001 COSTS_N_INSNS (1), /* cost of movzx */
1002 8, /* "large" insn */
1004 4, /* cost for loading QImode using movzbl */
1005 {5, 5, 4}, /* cost of loading integer registers
1006 in QImode, HImode and SImode.
1007 Relative to reg-reg move (2). */
1008 {4, 4, 4}, /* cost of storing integer registers */
1009 2, /* cost of reg,reg fld/fst */
1010 {5, 5, 12}, /* cost of loading fp registers
1011 in SFmode, DFmode and XFmode */
1012 {4, 4, 8}, /* cost of storing fp registers
1013 in SFmode, DFmode and XFmode */
1014 2, /* cost of moving MMX register */
1015 {4, 4}, /* cost of loading MMX registers
1016 in SImode and DImode */
1017 {4, 4}, /* cost of storing MMX registers
1018 in SImode and DImode */
1019 2, /* cost of moving SSE register */
1020 {4, 4, 4}, /* cost of loading SSE registers
1021 in SImode, DImode and TImode */
1022 {4, 4, 4}, /* cost of storing SSE registers
1023 in SImode, DImode and TImode */
1024 2, /* MMX or SSE register to integer */
1026 MOVD reg64, xmmreg Double FSTORE 4
1027 MOVD reg32, xmmreg Double FSTORE 4
1029 MOVD reg64, xmmreg Double FADD 3
1031 MOVD reg32, xmmreg Double FADD 3
1033 16, /* size of l1 cache. */
1034 2048, /* size of l2 cache. */
1035 64, /* size of prefetch block */
1036 /* New AMD processors never drop prefetches; if they cannot be performed
1037 immediately, they are queued. We set number of simultaneous prefetches
1038 to a large constant to reflect this (it probably is not a good idea not
1039 to limit number of prefetches at all, as their execution also takes some
1041 100, /* number of parallel prefetches */
1042 2, /* Branch cost */
1043 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1044 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1045 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1046 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1047 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1048 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1052 6, /* scalar_stmt_cost. */
1053 4, /* scalar load_cost. */
1054 4, /* scalar_store_cost. */
1055 6, /* vec_stmt_cost. */
1056 0, /* vec_to_scalar_cost. */
1057 2, /* scalar_to_vec_cost. */
1058 4, /* vec_align_load_cost. */
1059 4, /* vec_unalign_load_cost. */
1060 4, /* vec_store_cost. */
1061 4, /* cond_taken_branch_cost. */
1062 2, /* cond_not_taken_branch_cost. */
1065 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1066 very small blocks it is better to use loop. For large blocks, libcall
1067 can do nontemporary accesses and beat inline considerably. */
1069 static stringop_algs bdver2_memcpy[2] = {
1070 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1071 {-1, rep_prefix_4_byte, false}}},
1072 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1073 {-1, libcall, false}}}};
1074 static stringop_algs bdver2_memset[2] = {
1075 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1076 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1077 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1078 {-1, libcall, false}}}};
1080 const struct processor_costs bdver2_cost = {
1081 COSTS_N_INSNS (1), /* cost of an add instruction */
1082 COSTS_N_INSNS (1), /* cost of a lea instruction */
1083 COSTS_N_INSNS (1), /* variable shift costs */
1084 COSTS_N_INSNS (1), /* constant shift costs */
1085 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1086 COSTS_N_INSNS (4), /* HI */
1087 COSTS_N_INSNS (4), /* SI */
1088 COSTS_N_INSNS (6), /* DI */
1089 COSTS_N_INSNS (6)}, /* other */
1090 0, /* cost of multiply per each bit set */
1091 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1092 COSTS_N_INSNS (35), /* HI */
1093 COSTS_N_INSNS (51), /* SI */
1094 COSTS_N_INSNS (83), /* DI */
1095 COSTS_N_INSNS (83)}, /* other */
1096 COSTS_N_INSNS (1), /* cost of movsx */
1097 COSTS_N_INSNS (1), /* cost of movzx */
1098 8, /* "large" insn */
1100 4, /* cost for loading QImode using movzbl */
1101 {5, 5, 4}, /* cost of loading integer registers
1102 in QImode, HImode and SImode.
1103 Relative to reg-reg move (2). */
1104 {4, 4, 4}, /* cost of storing integer registers */
1105 2, /* cost of reg,reg fld/fst */
1106 {5, 5, 12}, /* cost of loading fp registers
1107 in SFmode, DFmode and XFmode */
1108 {4, 4, 8}, /* cost of storing fp registers
1109 in SFmode, DFmode and XFmode */
1110 2, /* cost of moving MMX register */
1111 {4, 4}, /* cost of loading MMX registers
1112 in SImode and DImode */
1113 {4, 4}, /* cost of storing MMX registers
1114 in SImode and DImode */
1115 2, /* cost of moving SSE register */
1116 {4, 4, 4}, /* cost of loading SSE registers
1117 in SImode, DImode and TImode */
1118 {4, 4, 4}, /* cost of storing SSE registers
1119 in SImode, DImode and TImode */
1120 2, /* MMX or SSE register to integer */
1122 MOVD reg64, xmmreg Double FSTORE 4
1123 MOVD reg32, xmmreg Double FSTORE 4
1125 MOVD reg64, xmmreg Double FADD 3
1127 MOVD reg32, xmmreg Double FADD 3
1129 16, /* size of l1 cache. */
1130 2048, /* size of l2 cache. */
1131 64, /* size of prefetch block */
1132 /* New AMD processors never drop prefetches; if they cannot be performed
1133 immediately, they are queued. We set number of simultaneous prefetches
1134 to a large constant to reflect this (it probably is not a good idea not
1135 to limit number of prefetches at all, as their execution also takes some
1137 100, /* number of parallel prefetches */
1138 2, /* Branch cost */
1139 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1140 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1141 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1142 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1143 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1144 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1148 6, /* scalar_stmt_cost. */
1149 4, /* scalar load_cost. */
1150 4, /* scalar_store_cost. */
1151 6, /* vec_stmt_cost. */
1152 0, /* vec_to_scalar_cost. */
1153 2, /* scalar_to_vec_cost. */
1154 4, /* vec_align_load_cost. */
1155 4, /* vec_unalign_load_cost. */
1156 4, /* vec_store_cost. */
1157 4, /* cond_taken_branch_cost. */
1158 2, /* cond_not_taken_branch_cost. */
1162 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1163 very small blocks it is better to use loop. For large blocks, libcall
1164 can do nontemporary accesses and beat inline considerably. */
1165 static stringop_algs bdver3_memcpy[2] = {
1166 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1167 {-1, rep_prefix_4_byte, false}}},
1168 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1169 {-1, libcall, false}}}};
1170 static stringop_algs bdver3_memset[2] = {
1171 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1172 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1173 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1174 {-1, libcall, false}}}};
1175 struct processor_costs bdver3_cost = {
1176 COSTS_N_INSNS (1), /* cost of an add instruction */
1177 COSTS_N_INSNS (1), /* cost of a lea instruction */
1178 COSTS_N_INSNS (1), /* variable shift costs */
1179 COSTS_N_INSNS (1), /* constant shift costs */
1180 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1181 COSTS_N_INSNS (4), /* HI */
1182 COSTS_N_INSNS (4), /* SI */
1183 COSTS_N_INSNS (6), /* DI */
1184 COSTS_N_INSNS (6)}, /* other */
1185 0, /* cost of multiply per each bit set */
1186 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1187 COSTS_N_INSNS (35), /* HI */
1188 COSTS_N_INSNS (51), /* SI */
1189 COSTS_N_INSNS (83), /* DI */
1190 COSTS_N_INSNS (83)}, /* other */
1191 COSTS_N_INSNS (1), /* cost of movsx */
1192 COSTS_N_INSNS (1), /* cost of movzx */
1193 8, /* "large" insn */
1195 4, /* cost for loading QImode using movzbl */
1196 {5, 5, 4}, /* cost of loading integer registers
1197 in QImode, HImode and SImode.
1198 Relative to reg-reg move (2). */
1199 {4, 4, 4}, /* cost of storing integer registers */
1200 2, /* cost of reg,reg fld/fst */
1201 {5, 5, 12}, /* cost of loading fp registers
1202 in SFmode, DFmode and XFmode */
1203 {4, 4, 8}, /* cost of storing fp registers
1204 in SFmode, DFmode and XFmode */
1205 2, /* cost of moving MMX register */
1206 {4, 4}, /* cost of loading MMX registers
1207 in SImode and DImode */
1208 {4, 4}, /* cost of storing MMX registers
1209 in SImode and DImode */
1210 2, /* cost of moving SSE register */
1211 {4, 4, 4}, /* cost of loading SSE registers
1212 in SImode, DImode and TImode */
1213 {4, 4, 4}, /* cost of storing SSE registers
1214 in SImode, DImode and TImode */
1215 2, /* MMX or SSE register to integer */
1216 16, /* size of l1 cache. */
1217 2048, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1235 6, /* scalar_stmt_cost. */
1236 4, /* scalar load_cost. */
1237 4, /* scalar_store_cost. */
1238 6, /* vec_stmt_cost. */
1239 0, /* vec_to_scalar_cost. */
1240 2, /* scalar_to_vec_cost. */
1241 4, /* vec_align_load_cost. */
1242 4, /* vec_unalign_load_cost. */
1243 4, /* vec_store_cost. */
1244 4, /* cond_taken_branch_cost. */
1245 2, /* cond_not_taken_branch_cost. */
1248 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1249 very small blocks it is better to use loop. For large blocks, libcall
1250 can do nontemporary accesses and beat inline considerably. */
1251 static stringop_algs bdver4_memcpy[2] = {
1252 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1253 {-1, rep_prefix_4_byte, false}}},
1254 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1255 {-1, libcall, false}}}};
1256 static stringop_algs bdver4_memset[2] = {
1257 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1258 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1259 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1260 {-1, libcall, false}}}};
1261 struct processor_costs bdver4_cost = {
1262 COSTS_N_INSNS (1), /* cost of an add instruction */
1263 COSTS_N_INSNS (1), /* cost of a lea instruction */
1264 COSTS_N_INSNS (1), /* variable shift costs */
1265 COSTS_N_INSNS (1), /* constant shift costs */
1266 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1267 COSTS_N_INSNS (4), /* HI */
1268 COSTS_N_INSNS (4), /* SI */
1269 COSTS_N_INSNS (6), /* DI */
1270 COSTS_N_INSNS (6)}, /* other */
1271 0, /* cost of multiply per each bit set */
1272 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1273 COSTS_N_INSNS (35), /* HI */
1274 COSTS_N_INSNS (51), /* SI */
1275 COSTS_N_INSNS (83), /* DI */
1276 COSTS_N_INSNS (83)}, /* other */
1277 COSTS_N_INSNS (1), /* cost of movsx */
1278 COSTS_N_INSNS (1), /* cost of movzx */
1279 8, /* "large" insn */
1281 4, /* cost for loading QImode using movzbl */
1282 {5, 5, 4}, /* cost of loading integer registers
1283 in QImode, HImode and SImode.
1284 Relative to reg-reg move (2). */
1285 {4, 4, 4}, /* cost of storing integer registers */
1286 2, /* cost of reg,reg fld/fst */
1287 {5, 5, 12}, /* cost of loading fp registers
1288 in SFmode, DFmode and XFmode */
1289 {4, 4, 8}, /* cost of storing fp registers
1290 in SFmode, DFmode and XFmode */
1291 2, /* cost of moving MMX register */
1292 {4, 4}, /* cost of loading MMX registers
1293 in SImode and DImode */
1294 {4, 4}, /* cost of storing MMX registers
1295 in SImode and DImode */
1296 2, /* cost of moving SSE register */
1297 {4, 4, 4}, /* cost of loading SSE registers
1298 in SImode, DImode and TImode */
1299 {4, 4, 4}, /* cost of storing SSE registers
1300 in SImode, DImode and TImode */
1301 2, /* MMX or SSE register to integer */
1302 16, /* size of l1 cache. */
1303 2048, /* size of l2 cache. */
1304 64, /* size of prefetch block */
1305 /* New AMD processors never drop prefetches; if they cannot be performed
1306 immediately, they are queued. We set number of simultaneous prefetches
1307 to a large constant to reflect this (it probably is not a good idea not
1308 to limit number of prefetches at all, as their execution also takes some
1310 100, /* number of parallel prefetches */
1311 2, /* Branch cost */
1312 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1313 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1314 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1315 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1316 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1317 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1321 6, /* scalar_stmt_cost. */
1322 4, /* scalar load_cost. */
1323 4, /* scalar_store_cost. */
1324 6, /* vec_stmt_cost. */
1325 0, /* vec_to_scalar_cost. */
1326 2, /* scalar_to_vec_cost. */
1327 4, /* vec_align_load_cost. */
1328 4, /* vec_unalign_load_cost. */
1329 4, /* vec_store_cost. */
1330 4, /* cond_taken_branch_cost. */
1331 2, /* cond_not_taken_branch_cost. */
1335 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1336 very small blocks it is better to use loop. For large blocks, libcall
1337 can do nontemporary accesses and beat inline considerably. */
1338 static stringop_algs znver1_memcpy[2] = {
1339 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1340 {-1, rep_prefix_4_byte, false}}},
1341 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1342 {-1, libcall, false}}}};
1343 static stringop_algs znver1_memset[2] = {
1344 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1345 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1346 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1347 {-1, libcall, false}}}};
1348 struct processor_costs znver1_cost = {
1349 COSTS_N_INSNS (1), /* cost of an add instruction. */
1350 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1351 COSTS_N_INSNS (1), /* variable shift costs. */
1352 COSTS_N_INSNS (1), /* constant shift costs. */
1353 {COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
1354 COSTS_N_INSNS (4), /* HI. */
1355 COSTS_N_INSNS (4), /* SI. */
1356 COSTS_N_INSNS (6), /* DI. */
1357 COSTS_N_INSNS (6)}, /* other. */
1358 0, /* cost of multiply per each bit
1360 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1361 COSTS_N_INSNS (35), /* HI. */
1362 COSTS_N_INSNS (51), /* SI. */
1363 COSTS_N_INSNS (83), /* DI. */
1364 COSTS_N_INSNS (83)}, /* other. */
1365 COSTS_N_INSNS (1), /* cost of movsx. */
1366 COSTS_N_INSNS (1), /* cost of movzx. */
1367 8, /* "large" insn. */
1368 9, /* MOVE_RATIO. */
1369 4, /* cost for loading QImode using
1371 {5, 5, 4}, /* cost of loading integer registers
1372 in QImode, HImode and SImode.
1373 Relative to reg-reg move (2). */
1374 {4, 4, 4}, /* cost of storing integer
1376 2, /* cost of reg,reg fld/fst. */
1377 {5, 5, 12}, /* cost of loading fp registers
1378 in SFmode, DFmode and XFmode. */
1379 {4, 4, 8}, /* cost of storing fp registers
1380 in SFmode, DFmode and XFmode. */
1381 2, /* cost of moving MMX register. */
1382 {4, 4}, /* cost of loading MMX registers
1383 in SImode and DImode. */
1384 {4, 4}, /* cost of storing MMX registers
1385 in SImode and DImode. */
1386 2, /* cost of moving SSE register. */
1387 {4, 4, 4}, /* cost of loading SSE registers
1388 in SImode, DImode and TImode. */
1389 {4, 4, 4}, /* cost of storing SSE registers
1390 in SImode, DImode and TImode. */
1391 2, /* MMX or SSE register to integer. */
1392 32, /* size of l1 cache. */
1393 512, /* size of l2 cache. */
1394 64, /* size of prefetch block. */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches. */
1401 2, /* Branch cost. */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1411 6, /* scalar_stmt_cost. */
1412 4, /* scalar load_cost. */
1413 4, /* scalar_store_cost. */
1414 6, /* vec_stmt_cost. */
1415 0, /* vec_to_scalar_cost. */
1416 2, /* scalar_to_vec_cost. */
1417 4, /* vec_align_load_cost. */
1418 4, /* vec_unalign_load_cost. */
1419 4, /* vec_store_cost. */
1420 4, /* cond_taken_branch_cost. */
1421 2, /* cond_not_taken_branch_cost. */
1424 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1425 very small blocks it is better to use loop. For large blocks, libcall can
1426 do nontemporary accesses and beat inline considerably. */
1427 static stringop_algs btver1_memcpy[2] = {
1428 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1429 {-1, rep_prefix_4_byte, false}}},
1430 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1431 {-1, libcall, false}}}};
1432 static stringop_algs btver1_memset[2] = {
1433 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1434 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1435 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1436 {-1, libcall, false}}}};
1437 const struct processor_costs btver1_cost = {
1438 COSTS_N_INSNS (1), /* cost of an add instruction */
1439 COSTS_N_INSNS (2), /* cost of a lea instruction */
1440 COSTS_N_INSNS (1), /* variable shift costs */
1441 COSTS_N_INSNS (1), /* constant shift costs */
1442 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1443 COSTS_N_INSNS (4), /* HI */
1444 COSTS_N_INSNS (3), /* SI */
1445 COSTS_N_INSNS (4), /* DI */
1446 COSTS_N_INSNS (5)}, /* other */
1447 0, /* cost of multiply per each bit set */
1448 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1449 COSTS_N_INSNS (35), /* HI */
1450 COSTS_N_INSNS (51), /* SI */
1451 COSTS_N_INSNS (83), /* DI */
1452 COSTS_N_INSNS (83)}, /* other */
1453 COSTS_N_INSNS (1), /* cost of movsx */
1454 COSTS_N_INSNS (1), /* cost of movzx */
1455 8, /* "large" insn */
1457 4, /* cost for loading QImode using movzbl */
1458 {3, 4, 3}, /* cost of loading integer registers
1459 in QImode, HImode and SImode.
1460 Relative to reg-reg move (2). */
1461 {3, 4, 3}, /* cost of storing integer registers */
1462 4, /* cost of reg,reg fld/fst */
1463 {4, 4, 12}, /* cost of loading fp registers
1464 in SFmode, DFmode and XFmode */
1465 {6, 6, 8}, /* cost of storing fp registers
1466 in SFmode, DFmode and XFmode */
1467 2, /* cost of moving MMX register */
1468 {3, 3}, /* cost of loading MMX registers
1469 in SImode and DImode */
1470 {4, 4}, /* cost of storing MMX registers
1471 in SImode and DImode */
1472 2, /* cost of moving SSE register */
1473 {4, 4, 3}, /* cost of loading SSE registers
1474 in SImode, DImode and TImode */
1475 {4, 4, 5}, /* cost of storing SSE registers
1476 in SImode, DImode and TImode */
1477 3, /* MMX or SSE register to integer */
1479 MOVD reg64, xmmreg Double FSTORE 4
1480 MOVD reg32, xmmreg Double FSTORE 4
1482 MOVD reg64, xmmreg Double FADD 3
1484 MOVD reg32, xmmreg Double FADD 3
1486 32, /* size of l1 cache. */
1487 512, /* size of l2 cache. */
1488 64, /* size of prefetch block */
1489 100, /* number of parallel prefetches */
1490 2, /* Branch cost */
1491 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1492 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1493 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1494 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1495 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1496 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1500 4, /* scalar_stmt_cost. */
1501 2, /* scalar load_cost. */
1502 2, /* scalar_store_cost. */
1503 6, /* vec_stmt_cost. */
1504 0, /* vec_to_scalar_cost. */
1505 2, /* scalar_to_vec_cost. */
1506 2, /* vec_align_load_cost. */
1507 2, /* vec_unalign_load_cost. */
1508 2, /* vec_store_cost. */
1509 2, /* cond_taken_branch_cost. */
1510 1, /* cond_not_taken_branch_cost. */
1513 static stringop_algs btver2_memcpy[2] = {
1514 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1515 {-1, rep_prefix_4_byte, false}}},
1516 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1517 {-1, libcall, false}}}};
1518 static stringop_algs btver2_memset[2] = {
1519 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1520 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1521 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1522 {-1, libcall, false}}}};
1523 const struct processor_costs btver2_cost = {
1524 COSTS_N_INSNS (1), /* cost of an add instruction */
1525 COSTS_N_INSNS (2), /* cost of a lea instruction */
1526 COSTS_N_INSNS (1), /* variable shift costs */
1527 COSTS_N_INSNS (1), /* constant shift costs */
1528 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1529 COSTS_N_INSNS (4), /* HI */
1530 COSTS_N_INSNS (3), /* SI */
1531 COSTS_N_INSNS (4), /* DI */
1532 COSTS_N_INSNS (5)}, /* other */
1533 0, /* cost of multiply per each bit set */
1534 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1535 COSTS_N_INSNS (35), /* HI */
1536 COSTS_N_INSNS (51), /* SI */
1537 COSTS_N_INSNS (83), /* DI */
1538 COSTS_N_INSNS (83)}, /* other */
1539 COSTS_N_INSNS (1), /* cost of movsx */
1540 COSTS_N_INSNS (1), /* cost of movzx */
1541 8, /* "large" insn */
1543 4, /* cost for loading QImode using movzbl */
1544 {3, 4, 3}, /* cost of loading integer registers
1545 in QImode, HImode and SImode.
1546 Relative to reg-reg move (2). */
1547 {3, 4, 3}, /* cost of storing integer registers */
1548 4, /* cost of reg,reg fld/fst */
1549 {4, 4, 12}, /* cost of loading fp registers
1550 in SFmode, DFmode and XFmode */
1551 {6, 6, 8}, /* cost of storing fp registers
1552 in SFmode, DFmode and XFmode */
1553 2, /* cost of moving MMX register */
1554 {3, 3}, /* cost of loading MMX registers
1555 in SImode and DImode */
1556 {4, 4}, /* cost of storing MMX registers
1557 in SImode and DImode */
1558 2, /* cost of moving SSE register */
1559 {4, 4, 3}, /* cost of loading SSE registers
1560 in SImode, DImode and TImode */
1561 {4, 4, 5}, /* cost of storing SSE registers
1562 in SImode, DImode and TImode */
1563 3, /* MMX or SSE register to integer */
1565 MOVD reg64, xmmreg Double FSTORE 4
1566 MOVD reg32, xmmreg Double FSTORE 4
1568 MOVD reg64, xmmreg Double FADD 3
1570 MOVD reg32, xmmreg Double FADD 3
1572 32, /* size of l1 cache. */
1573 2048, /* size of l2 cache. */
1574 64, /* size of prefetch block */
1575 100, /* number of parallel prefetches */
1576 2, /* Branch cost */
1577 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1578 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1579 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1580 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1581 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1582 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1585 4, /* scalar_stmt_cost. */
1586 2, /* scalar load_cost. */
1587 2, /* scalar_store_cost. */
1588 6, /* vec_stmt_cost. */
1589 0, /* vec_to_scalar_cost. */
1590 2, /* scalar_to_vec_cost. */
1591 2, /* vec_align_load_cost. */
1592 2, /* vec_unalign_load_cost. */
1593 2, /* vec_store_cost. */
1594 2, /* cond_taken_branch_cost. */
1595 1, /* cond_not_taken_branch_cost. */
1598 static stringop_algs pentium4_memcpy[2] = {
1599 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1600 DUMMY_STRINGOP_ALGS};
1601 static stringop_algs pentium4_memset[2] = {
1602 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1603 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1604 DUMMY_STRINGOP_ALGS};
1607 struct processor_costs pentium4_cost = {
1608 COSTS_N_INSNS (1), /* cost of an add instruction */
1609 COSTS_N_INSNS (3), /* cost of a lea instruction */
1610 COSTS_N_INSNS (4), /* variable shift costs */
1611 COSTS_N_INSNS (4), /* constant shift costs */
1612 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1613 COSTS_N_INSNS (15), /* HI */
1614 COSTS_N_INSNS (15), /* SI */
1615 COSTS_N_INSNS (15), /* DI */
1616 COSTS_N_INSNS (15)}, /* other */
1617 0, /* cost of multiply per each bit set */
1618 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1619 COSTS_N_INSNS (56), /* HI */
1620 COSTS_N_INSNS (56), /* SI */
1621 COSTS_N_INSNS (56), /* DI */
1622 COSTS_N_INSNS (56)}, /* other */
1623 COSTS_N_INSNS (1), /* cost of movsx */
1624 COSTS_N_INSNS (1), /* cost of movzx */
1625 16, /* "large" insn */
1627 2, /* cost for loading QImode using movzbl */
1628 {4, 5, 4}, /* cost of loading integer registers
1629 in QImode, HImode and SImode.
1630 Relative to reg-reg move (2). */
1631 {2, 3, 2}, /* cost of storing integer registers */
1632 2, /* cost of reg,reg fld/fst */
1633 {2, 2, 6}, /* cost of loading fp registers
1634 in SFmode, DFmode and XFmode */
1635 {4, 4, 6}, /* cost of storing fp registers
1636 in SFmode, DFmode and XFmode */
1637 2, /* cost of moving MMX register */
1638 {2, 2}, /* cost of loading MMX registers
1639 in SImode and DImode */
1640 {2, 2}, /* cost of storing MMX registers
1641 in SImode and DImode */
1642 12, /* cost of moving SSE register */
1643 {12, 12, 12}, /* cost of loading SSE registers
1644 in SImode, DImode and TImode */
1645 {2, 2, 8}, /* cost of storing SSE registers
1646 in SImode, DImode and TImode */
1647 10, /* MMX or SSE register to integer */
1648 8, /* size of l1 cache. */
1649 256, /* size of l2 cache. */
1650 64, /* size of prefetch block */
1651 6, /* number of parallel prefetches */
1652 2, /* Branch cost */
1653 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1654 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1655 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1656 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1657 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1658 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1661 1, /* scalar_stmt_cost. */
1662 1, /* scalar load_cost. */
1663 1, /* scalar_store_cost. */
1664 1, /* vec_stmt_cost. */
1665 1, /* vec_to_scalar_cost. */
1666 1, /* scalar_to_vec_cost. */
1667 1, /* vec_align_load_cost. */
1668 2, /* vec_unalign_load_cost. */
1669 1, /* vec_store_cost. */
1670 3, /* cond_taken_branch_cost. */
1671 1, /* cond_not_taken_branch_cost. */
1674 static stringop_algs nocona_memcpy[2] = {
1675 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1676 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1677 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1679 static stringop_algs nocona_memset[2] = {
1680 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1681 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1682 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1683 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 struct processor_costs nocona_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1), /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (10), /* HI */
1693 COSTS_N_INSNS (10), /* SI */
1694 COSTS_N_INSNS (10), /* DI */
1695 COSTS_N_INSNS (10)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (66), /* HI */
1699 COSTS_N_INSNS (66), /* SI */
1700 COSTS_N_INSNS (66), /* DI */
1701 COSTS_N_INSNS (66)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 16, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 3, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {4, 4, 4}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 6, /* cost of moving MMX register */
1717 {12, 12}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {12, 12}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 6, /* cost of moving SSE register */
1722 {12, 12, 12}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {12, 12, 12}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 8, /* MMX or SSE register to integer */
1727 8, /* size of l1 cache. */
1728 1024, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 8, /* number of parallel prefetches */
1731 1, /* Branch cost */
1732 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 1, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1753 static stringop_algs atom_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs atom_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 struct processor_costs atom_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (4), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 1, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1830 static stringop_algs slm_memcpy[2] = {
1831 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1832 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1833 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1834 static stringop_algs slm_memset[2] = {
1835 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1836 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1837 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1838 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1840 struct processor_costs slm_cost = {
1841 COSTS_N_INSNS (1), /* cost of an add instruction */
1842 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1843 COSTS_N_INSNS (1), /* variable shift costs */
1844 COSTS_N_INSNS (1), /* constant shift costs */
1845 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1846 COSTS_N_INSNS (3), /* HI */
1847 COSTS_N_INSNS (3), /* SI */
1848 COSTS_N_INSNS (4), /* DI */
1849 COSTS_N_INSNS (2)}, /* other */
1850 0, /* cost of multiply per each bit set */
1851 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1852 COSTS_N_INSNS (26), /* HI */
1853 COSTS_N_INSNS (42), /* SI */
1854 COSTS_N_INSNS (74), /* DI */
1855 COSTS_N_INSNS (74)}, /* other */
1856 COSTS_N_INSNS (1), /* cost of movsx */
1857 COSTS_N_INSNS (1), /* cost of movzx */
1858 8, /* "large" insn */
1859 17, /* MOVE_RATIO */
1860 4, /* cost for loading QImode using movzbl */
1861 {4, 4, 4}, /* cost of loading integer registers
1862 in QImode, HImode and SImode.
1863 Relative to reg-reg move (2). */
1864 {4, 4, 4}, /* cost of storing integer registers */
1865 4, /* cost of reg,reg fld/fst */
1866 {12, 12, 12}, /* cost of loading fp registers
1867 in SFmode, DFmode and XFmode */
1868 {6, 6, 8}, /* cost of storing fp registers
1869 in SFmode, DFmode and XFmode */
1870 2, /* cost of moving MMX register */
1871 {8, 8}, /* cost of loading MMX registers
1872 in SImode and DImode */
1873 {8, 8}, /* cost of storing MMX registers
1874 in SImode and DImode */
1875 2, /* cost of moving SSE register */
1876 {8, 8, 8}, /* cost of loading SSE registers
1877 in SImode, DImode and TImode */
1878 {8, 8, 8}, /* cost of storing SSE registers
1879 in SImode, DImode and TImode */
1880 5, /* MMX or SSE register to integer */
1881 32, /* size of l1 cache. */
1882 256, /* size of l2 cache. */
1883 64, /* size of prefetch block */
1884 6, /* number of parallel prefetches */
1885 3, /* Branch cost */
1886 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1887 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1888 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1889 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1890 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1891 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1894 1, /* scalar_stmt_cost. */
1895 1, /* scalar load_cost. */
1896 1, /* scalar_store_cost. */
1897 1, /* vec_stmt_cost. */
1898 4, /* vec_to_scalar_cost. */
1899 1, /* scalar_to_vec_cost. */
1900 1, /* vec_align_load_cost. */
1901 2, /* vec_unalign_load_cost. */
1902 1, /* vec_store_cost. */
1903 3, /* cond_taken_branch_cost. */
1904 1, /* cond_not_taken_branch_cost. */
1907 static stringop_algs intel_memcpy[2] = {
1908 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1909 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1910 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1911 static stringop_algs intel_memset[2] = {
1912 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1914 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1915 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1917 struct processor_costs intel_cost = {
1918 COSTS_N_INSNS (1), /* cost of an add instruction */
1919 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1920 COSTS_N_INSNS (1), /* variable shift costs */
1921 COSTS_N_INSNS (1), /* constant shift costs */
1922 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1923 COSTS_N_INSNS (3), /* HI */
1924 COSTS_N_INSNS (3), /* SI */
1925 COSTS_N_INSNS (4), /* DI */
1926 COSTS_N_INSNS (2)}, /* other */
1927 0, /* cost of multiply per each bit set */
1928 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1929 COSTS_N_INSNS (26), /* HI */
1930 COSTS_N_INSNS (42), /* SI */
1931 COSTS_N_INSNS (74), /* DI */
1932 COSTS_N_INSNS (74)}, /* other */
1933 COSTS_N_INSNS (1), /* cost of movsx */
1934 COSTS_N_INSNS (1), /* cost of movzx */
1935 8, /* "large" insn */
1936 17, /* MOVE_RATIO */
1937 4, /* cost for loading QImode using movzbl */
1938 {4, 4, 4}, /* cost of loading integer registers
1939 in QImode, HImode and SImode.
1940 Relative to reg-reg move (2). */
1941 {4, 4, 4}, /* cost of storing integer registers */
1942 4, /* cost of reg,reg fld/fst */
1943 {12, 12, 12}, /* cost of loading fp registers
1944 in SFmode, DFmode and XFmode */
1945 {6, 6, 8}, /* cost of storing fp registers
1946 in SFmode, DFmode and XFmode */
1947 2, /* cost of moving MMX register */
1948 {8, 8}, /* cost of loading MMX registers
1949 in SImode and DImode */
1950 {8, 8}, /* cost of storing MMX registers
1951 in SImode and DImode */
1952 2, /* cost of moving SSE register */
1953 {8, 8, 8}, /* cost of loading SSE registers
1954 in SImode, DImode and TImode */
1955 {8, 8, 8}, /* cost of storing SSE registers
1956 in SImode, DImode and TImode */
1957 5, /* MMX or SSE register to integer */
1958 32, /* size of l1 cache. */
1959 256, /* size of l2 cache. */
1960 64, /* size of prefetch block */
1961 6, /* number of parallel prefetches */
1962 3, /* Branch cost */
1963 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1964 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1965 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1966 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1967 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1968 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1971 1, /* scalar_stmt_cost. */
1972 1, /* scalar load_cost. */
1973 1, /* scalar_store_cost. */
1974 1, /* vec_stmt_cost. */
1975 4, /* vec_to_scalar_cost. */
1976 1, /* scalar_to_vec_cost. */
1977 1, /* vec_align_load_cost. */
1978 2, /* vec_unalign_load_cost. */
1979 1, /* vec_store_cost. */
1980 3, /* cond_taken_branch_cost. */
1981 1, /* cond_not_taken_branch_cost. */
1984 /* Generic should produce code tuned for Core-i7 (and newer chips)
1985 and btver1 (and newer chips). */
1987 static stringop_algs generic_memcpy[2] = {
1988 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1989 {-1, libcall, false}}},
1990 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1991 {-1, libcall, false}}}};
1992 static stringop_algs generic_memset[2] = {
1993 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1994 {-1, libcall, false}}},
1995 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1996 {-1, libcall, false}}}};
1998 struct processor_costs generic_cost = {
1999 COSTS_N_INSNS (1), /* cost of an add instruction */
2000 /* On all chips taken into consideration lea is 2 cycles and more. With
2001 this cost however our current implementation of synth_mult results in
2002 use of unnecessary temporary registers causing regression on several
2003 SPECfp benchmarks. */
2004 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2005 COSTS_N_INSNS (1), /* variable shift costs */
2006 COSTS_N_INSNS (1), /* constant shift costs */
2007 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2008 COSTS_N_INSNS (4), /* HI */
2009 COSTS_N_INSNS (3), /* SI */
2010 COSTS_N_INSNS (4), /* DI */
2011 COSTS_N_INSNS (2)}, /* other */
2012 0, /* cost of multiply per each bit set */
2013 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2014 COSTS_N_INSNS (26), /* HI */
2015 COSTS_N_INSNS (42), /* SI */
2016 COSTS_N_INSNS (74), /* DI */
2017 COSTS_N_INSNS (74)}, /* other */
2018 COSTS_N_INSNS (1), /* cost of movsx */
2019 COSTS_N_INSNS (1), /* cost of movzx */
2020 8, /* "large" insn */
2021 17, /* MOVE_RATIO */
2022 4, /* cost for loading QImode using movzbl */
2023 {4, 4, 4}, /* cost of loading integer registers
2024 in QImode, HImode and SImode.
2025 Relative to reg-reg move (2). */
2026 {4, 4, 4}, /* cost of storing integer registers */
2027 4, /* cost of reg,reg fld/fst */
2028 {12, 12, 12}, /* cost of loading fp registers
2029 in SFmode, DFmode and XFmode */
2030 {6, 6, 8}, /* cost of storing fp registers
2031 in SFmode, DFmode and XFmode */
2032 2, /* cost of moving MMX register */
2033 {8, 8}, /* cost of loading MMX registers
2034 in SImode and DImode */
2035 {8, 8}, /* cost of storing MMX registers
2036 in SImode and DImode */
2037 2, /* cost of moving SSE register */
2038 {8, 8, 8}, /* cost of loading SSE registers
2039 in SImode, DImode and TImode */
2040 {8, 8, 8}, /* cost of storing SSE registers
2041 in SImode, DImode and TImode */
2042 5, /* MMX or SSE register to integer */
2043 32, /* size of l1 cache. */
2044 512, /* size of l2 cache. */
2045 64, /* size of prefetch block */
2046 6, /* number of parallel prefetches */
2047 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2048 value is increased to perhaps more appropriate value of 5. */
2049 3, /* Branch cost */
2050 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2051 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2052 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2053 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2054 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2055 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2058 1, /* scalar_stmt_cost. */
2059 1, /* scalar load_cost. */
2060 1, /* scalar_store_cost. */
2061 1, /* vec_stmt_cost. */
2062 1, /* vec_to_scalar_cost. */
2063 1, /* scalar_to_vec_cost. */
2064 1, /* vec_align_load_cost. */
2065 2, /* vec_unalign_load_cost. */
2066 1, /* vec_store_cost. */
2067 3, /* cond_taken_branch_cost. */
2068 1, /* cond_not_taken_branch_cost. */
2071 /* core_cost should produce code tuned for Core familly of CPUs. */
2072 static stringop_algs core_memcpy[2] = {
2073 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2074 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2075 {-1, libcall, false}}}};
2076 static stringop_algs core_memset[2] = {
2077 {libcall, {{6, loop_1_byte, true},
2079 {8192, rep_prefix_4_byte, true},
2080 {-1, libcall, false}}},
2081 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2082 {-1, libcall, false}}}};
2085 struct processor_costs core_cost = {
2086 COSTS_N_INSNS (1), /* cost of an add instruction */
2087 /* On all chips taken into consideration lea is 2 cycles and more. With
2088 this cost however our current implementation of synth_mult results in
2089 use of unnecessary temporary registers causing regression on several
2090 SPECfp benchmarks. */
2091 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2092 COSTS_N_INSNS (1), /* variable shift costs */
2093 COSTS_N_INSNS (1), /* constant shift costs */
2094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2095 COSTS_N_INSNS (4), /* HI */
2096 COSTS_N_INSNS (3), /* SI */
2097 COSTS_N_INSNS (4), /* DI */
2098 COSTS_N_INSNS (2)}, /* other */
2099 0, /* cost of multiply per each bit set */
2100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2101 COSTS_N_INSNS (26), /* HI */
2102 COSTS_N_INSNS (42), /* SI */
2103 COSTS_N_INSNS (74), /* DI */
2104 COSTS_N_INSNS (74)}, /* other */
2105 COSTS_N_INSNS (1), /* cost of movsx */
2106 COSTS_N_INSNS (1), /* cost of movzx */
2107 8, /* "large" insn */
2108 17, /* MOVE_RATIO */
2109 4, /* cost for loading QImode using movzbl */
2110 {4, 4, 4}, /* cost of loading integer registers
2111 in QImode, HImode and SImode.
2112 Relative to reg-reg move (2). */
2113 {4, 4, 4}, /* cost of storing integer registers */
2114 4, /* cost of reg,reg fld/fst */
2115 {12, 12, 12}, /* cost of loading fp registers
2116 in SFmode, DFmode and XFmode */
2117 {6, 6, 8}, /* cost of storing fp registers
2118 in SFmode, DFmode and XFmode */
2119 2, /* cost of moving MMX register */
2120 {8, 8}, /* cost of loading MMX registers
2121 in SImode and DImode */
2122 {8, 8}, /* cost of storing MMX registers
2123 in SImode and DImode */
2124 2, /* cost of moving SSE register */
2125 {8, 8, 8}, /* cost of loading SSE registers
2126 in SImode, DImode and TImode */
2127 {8, 8, 8}, /* cost of storing SSE registers
2128 in SImode, DImode and TImode */
2129 5, /* MMX or SSE register to integer */
2130 64, /* size of l1 cache. */
2131 512, /* size of l2 cache. */
2132 64, /* size of prefetch block */
2133 6, /* number of parallel prefetches */
2134 /* FIXME perhaps more appropriate value is 5. */
2135 3, /* Branch cost */
2136 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2137 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2138 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2139 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2140 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2141 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2144 1, /* scalar_stmt_cost. */
2145 1, /* scalar load_cost. */
2146 1, /* scalar_store_cost. */
2147 1, /* vec_stmt_cost. */
2148 1, /* vec_to_scalar_cost. */
2149 1, /* scalar_to_vec_cost. */
2150 1, /* vec_align_load_cost. */
2151 2, /* vec_unalign_load_cost. */
2152 1, /* vec_store_cost. */
2153 3, /* cond_taken_branch_cost. */
2154 1, /* cond_not_taken_branch_cost. */
2158 /* Set by -mtune. */
2159 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2161 /* Set by -mtune or -Os. */
2162 const struct processor_costs *ix86_cost = &pentium_cost;
2164 /* Processor feature/optimization bitmasks. */
2165 #define m_386 (1<<PROCESSOR_I386)
2166 #define m_486 (1<<PROCESSOR_I486)
2167 #define m_PENT (1<<PROCESSOR_PENTIUM)
2168 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2169 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2170 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2171 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2172 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2173 #define m_CORE2 (1<<PROCESSOR_CORE2)
2174 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2175 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2176 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2177 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2178 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2179 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2180 #define m_KNL (1<<PROCESSOR_KNL)
2181 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2182 #define m_INTEL (1<<PROCESSOR_INTEL)
2184 #define m_GEODE (1<<PROCESSOR_GEODE)
2185 #define m_K6 (1<<PROCESSOR_K6)
2186 #define m_K6_GEODE (m_K6 | m_GEODE)
2187 #define m_K8 (1<<PROCESSOR_K8)
2188 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2189 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2190 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2191 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2192 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2193 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2194 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2195 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2196 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2197 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2198 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2199 #define m_BTVER (m_BTVER1 | m_BTVER2)
2200 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2203 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2205 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2207 #define DEF_TUNE(tune, name, selector) name,
2208 #include "x86-tune.def"
2212 /* Feature tests against the various tunings. */
2213 unsigned char ix86_tune_features[X86_TUNE_LAST];
2215 /* Feature tests against the various tunings used to create ix86_tune_features
2216 based on the processor mask. */
2217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2219 #define DEF_TUNE(tune, name, selector) selector,
2220 #include "x86-tune.def"
2224 /* Feature tests against the various architecture variations. */
2225 unsigned char ix86_arch_features[X86_ARCH_LAST];
2227 /* Feature tests against the various architecture variations, used to create
2228 ix86_arch_features based on the processor mask. */
2229 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2230 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2231 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2246 /* In case the average insn count for single function invocation is
2247 lower than this constant, emit fast (but longer) prologue and
2249 #define FAST_PROLOGUE_INSN_COUNT 20
2251 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2252 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2253 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2254 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2256 /* Array of the smallest class containing reg number REGNO, indexed by
2257 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2259 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2261 /* ax, dx, cx, bx */
2262 AREG, DREG, CREG, BREG,
2263 /* si, di, bp, sp */
2264 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2266 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2267 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2270 /* flags, fpsr, fpcr, frame */
2271 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2273 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2276 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2281 /* SSE REX registers */
2282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2284 /* AVX-512 SSE registers */
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2289 /* Mask registers. */
2290 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2292 /* MPX bound registers */
2293 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2296 /* The "default" register map used in 32bit mode. */
2298 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2300 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2301 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2302 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2303 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2304 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2308 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2309 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2310 101, 102, 103, 104, /* bound registers */
2313 /* The "default" register map used in 64bit mode. */
2315 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2317 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2318 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2319 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2320 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2321 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2322 8,9,10,11,12,13,14,15, /* extended integer registers */
2323 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2324 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2325 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2326 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2327 126, 127, 128, 129, /* bound registers */
2330 /* Define the register numbers to be used in Dwarf debugging information.
2331 The SVR4 reference port C compiler uses the following register numbers
2332 in its Dwarf output code:
2333 0 for %eax (gcc regno = 0)
2334 1 for %ecx (gcc regno = 2)
2335 2 for %edx (gcc regno = 1)
2336 3 for %ebx (gcc regno = 3)
2337 4 for %esp (gcc regno = 7)
2338 5 for %ebp (gcc regno = 6)
2339 6 for %esi (gcc regno = 4)
2340 7 for %edi (gcc regno = 5)
2341 The following three DWARF register numbers are never generated by
2342 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2343 believes these numbers have these meanings.
2344 8 for %eip (no gcc equivalent)
2345 9 for %eflags (gcc regno = 17)
2346 10 for %trapno (no gcc equivalent)
2347 It is not at all clear how we should number the FP stack registers
2348 for the x86 architecture. If the version of SDB on x86/svr4 were
2349 a bit less brain dead with respect to floating-point then we would
2350 have a precedent to follow with respect to DWARF register numbers
2351 for x86 FP registers, but the SDB on x86/svr4 is so completely
2352 broken with respect to FP registers that it is hardly worth thinking
2353 of it as something to strive for compatibility with.
2354 The version of x86/svr4 SDB I have at the moment does (partially)
2355 seem to believe that DWARF register number 11 is associated with
2356 the x86 register %st(0), but that's about all. Higher DWARF
2357 register numbers don't seem to be associated with anything in
2358 particular, and even for DWARF regno 11, SDB only seems to under-
2359 stand that it should say that a variable lives in %st(0) (when
2360 asked via an `=' command) if we said it was in DWARF regno 11,
2361 but SDB still prints garbage when asked for the value of the
2362 variable in question (via a `/' command).
2363 (Also note that the labels SDB prints for various FP stack regs
2364 when doing an `x' command are all wrong.)
2365 Note that these problems generally don't affect the native SVR4
2366 C compiler because it doesn't allow the use of -O with -g and
2367 because when it is *not* optimizing, it allocates a memory
2368 location for each floating-point variable, and the memory
2369 location is what gets described in the DWARF AT_location
2370 attribute for the variable in question.
2371 Regardless of the severe mental illness of the x86/svr4 SDB, we
2372 do something sensible here and we use the following DWARF
2373 register numbers. Note that these are all stack-top-relative
2375 11 for %st(0) (gcc regno = 8)
2376 12 for %st(1) (gcc regno = 9)
2377 13 for %st(2) (gcc regno = 10)
2378 14 for %st(3) (gcc regno = 11)
2379 15 for %st(4) (gcc regno = 12)
2380 16 for %st(5) (gcc regno = 13)
2381 17 for %st(6) (gcc regno = 14)
2382 18 for %st(7) (gcc regno = 15)
2384 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2386 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2387 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2388 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2389 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2390 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2394 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2395 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2396 101, 102, 103, 104, /* bound registers */
2399 /* Define parameter passing and return registers. */
2401 static int const x86_64_int_parameter_registers[6] =
2403 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2406 static int const x86_64_ms_abi_int_parameter_registers[4] =
2408 CX_REG, DX_REG, R8_REG, R9_REG
2411 static int const x86_64_int_return_registers[4] =
2413 AX_REG, DX_REG, DI_REG, SI_REG
2416 /* Additional registers that are clobbered by SYSV calls. */
2418 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2422 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2423 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2426 /* Define the structure for the machine field in struct function. */
2428 struct GTY(()) stack_local_entry {
2429 unsigned short mode;
2432 struct stack_local_entry *next;
2435 /* Structure describing stack frame layout.
2436 Stack grows downward:
2442 saved static chain if ix86_static_chain_on_stack
2444 saved frame pointer if frame_pointer_needed
2445 <- HARD_FRAME_POINTER
2451 <- sse_regs_save_offset
2454 [va_arg registers] |
2458 [padding2] | = to_allocate
2467 int outgoing_arguments_size;
2469 /* The offsets relative to ARG_POINTER. */
2470 HOST_WIDE_INT frame_pointer_offset;
2471 HOST_WIDE_INT hard_frame_pointer_offset;
2472 HOST_WIDE_INT stack_pointer_offset;
2473 HOST_WIDE_INT hfp_save_offset;
2474 HOST_WIDE_INT reg_save_offset;
2475 HOST_WIDE_INT sse_reg_save_offset;
2477 /* When save_regs_using_mov is set, emit prologue using
2478 move instead of push instructions. */
2479 bool save_regs_using_mov;
2482 /* Which cpu are we scheduling for. */
2483 enum attr_cpu ix86_schedule;
2485 /* Which cpu are we optimizing for. */
2486 enum processor_type ix86_tune;
2488 /* Which instruction set architecture to use. */
2489 enum processor_type ix86_arch;
2491 /* True if processor has SSE prefetch instruction. */
2492 unsigned char x86_prefetch_sse;
2494 /* -mstackrealign option */
2495 static const char ix86_force_align_arg_pointer_string[]
2496 = "force_align_arg_pointer";
2498 static rtx (*ix86_gen_leave) (void);
2499 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2501 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2502 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2503 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2505 static rtx (*ix86_gen_clzero) (rtx);
2506 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2507 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2508 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2511 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2513 /* Preferred alignment for stack boundary in bits. */
2514 unsigned int ix86_preferred_stack_boundary;
2516 /* Alignment for incoming stack boundary in bits specified at
2518 static unsigned int ix86_user_incoming_stack_boundary;
2520 /* Default alignment for incoming stack boundary in bits. */
2521 static unsigned int ix86_default_incoming_stack_boundary;
2523 /* Alignment for incoming stack boundary in bits. */
2524 unsigned int ix86_incoming_stack_boundary;
2526 /* Calling abi specific va_list type nodes. */
2527 static GTY(()) tree sysv_va_list_type_node;
2528 static GTY(()) tree ms_va_list_type_node;
2530 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2531 char internal_label_prefix[16];
2532 int internal_label_prefix_len;
2534 /* Fence to use after loop using movnt. */
2537 /* Register class used for passing given 64bit part of the argument.
2538 These represent classes as documented by the PS ABI, with the exception
2539 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2540 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2542 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2543 whenever possible (upper half does contain padding). */
2544 enum x86_64_reg_class
2547 X86_64_INTEGER_CLASS,
2548 X86_64_INTEGERSI_CLASS,
2555 X86_64_COMPLEX_X87_CLASS,
2559 #define MAX_CLASSES 8
2561 /* Table of constants used by fldpi, fldln2, etc.... */
2562 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2563 static bool ext_80387_constants_init = 0;
2566 static struct machine_function * ix86_init_machine_status (void);
2567 static rtx ix86_function_value (const_tree, const_tree, bool);
2568 static bool ix86_function_value_regno_p (const unsigned int);
2569 static unsigned int ix86_function_arg_boundary (machine_mode,
2571 static rtx ix86_static_chain (const_tree, bool);
2572 static int ix86_function_regparm (const_tree, const_tree);
2573 static void ix86_compute_frame_layout (struct ix86_frame *);
2574 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2576 static void ix86_add_new_builtins (HOST_WIDE_INT);
2577 static tree ix86_canonical_va_list_type (tree);
2578 static void predict_jump (int);
2579 static unsigned int split_stack_prologue_scratch_regno (void);
2580 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2582 enum ix86_function_specific_strings
2584 IX86_FUNCTION_SPECIFIC_ARCH,
2585 IX86_FUNCTION_SPECIFIC_TUNE,
2586 IX86_FUNCTION_SPECIFIC_MAX
2589 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2590 const char *, enum fpmath_unit, bool);
2591 static void ix86_function_specific_save (struct cl_target_option *,
2592 struct gcc_options *opts);
2593 static void ix86_function_specific_restore (struct gcc_options *opts,
2594 struct cl_target_option *);
2595 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2596 static void ix86_function_specific_print (FILE *, int,
2597 struct cl_target_option *);
2598 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2599 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2600 struct gcc_options *,
2601 struct gcc_options *,
2602 struct gcc_options *);
2603 static bool ix86_can_inline_p (tree, tree);
2604 static void ix86_set_current_function (tree);
2605 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2607 static enum calling_abi ix86_function_abi (const_tree);
2610 #ifndef SUBTARGET32_DEFAULT_CPU
2611 #define SUBTARGET32_DEFAULT_CPU "i386"
2614 /* Whether -mtune= or -march= were specified */
2615 static int ix86_tune_defaulted;
2616 static int ix86_arch_specified;
2618 /* Vectorization library interface and handlers. */
2619 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2621 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2622 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2624 /* Processor target table, indexed by processor number */
2627 const char *const name; /* processor name */
2628 const struct processor_costs *cost; /* Processor costs */
2629 const int align_loop; /* Default alignments. */
2630 const int align_loop_max_skip;
2631 const int align_jump;
2632 const int align_jump_max_skip;
2633 const int align_func;
2636 /* This table must be in sync with enum processor_type in i386.h. */
2637 static const struct ptt processor_target_table[PROCESSOR_max] =
2639 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2640 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2641 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2642 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2643 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2644 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2645 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2646 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2647 {"core2", &core_cost, 16, 10, 16, 10, 16},
2648 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2649 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2650 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2651 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2652 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2653 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2654 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2655 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2656 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2657 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2658 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2659 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2660 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2661 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2662 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2663 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2664 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2665 {"znver1", &znver1_cost, 16, 10, 16, 7, 11},
2666 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2667 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2671 rest_of_handle_insert_vzeroupper (void)
2675 /* vzeroupper instructions are inserted immediately after reload to
2676 account for possible spills from 256bit registers. The pass
2677 reuses mode switching infrastructure by re-running mode insertion
2678 pass, so disable entities that have already been processed. */
2679 for (i = 0; i < MAX_386_ENTITIES; i++)
2680 ix86_optimize_mode_switching[i] = 0;
2682 ix86_optimize_mode_switching[AVX_U128] = 1;
2684 /* Call optimize_mode_switching. */
2685 g->get_passes ()->execute_pass_mode_switching ();
2689 /* Return 1 if INSN uses or defines a hard register.
2690 Hard register uses in a memory address are ignored.
2691 Clobbers and flags definitions are ignored. */
2694 has_non_address_hard_reg (rtx_insn *insn)
2697 FOR_EACH_INSN_DEF (ref, insn)
2698 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2699 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2700 && DF_REF_REGNO (ref) != FLAGS_REG)
2703 FOR_EACH_INSN_USE (ref, insn)
2704 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2710 /* Check if comparison INSN may be transformed
2711 into vector comparison. Currently we transform
2712 zero checks only which look like:
2714 (set (reg:CCZ 17 flags)
2715 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2716 (subreg:SI (reg:DI x) 0))
2717 (const_int 0 [0]))) */
2720 convertible_comparison_p (rtx_insn *insn)
2725 rtx def_set = single_set (insn);
2727 gcc_assert (def_set);
2729 rtx src = SET_SRC (def_set);
2730 rtx dst = SET_DEST (def_set);
2732 gcc_assert (GET_CODE (src) == COMPARE);
2734 if (GET_CODE (dst) != REG
2735 || REGNO (dst) != FLAGS_REG
2736 || GET_MODE (dst) != CCZmode)
2739 rtx op1 = XEXP (src, 0);
2740 rtx op2 = XEXP (src, 1);
2742 if (op2 != CONST0_RTX (GET_MODE (op2)))
2745 if (GET_CODE (op1) != IOR)
2748 op2 = XEXP (op1, 1);
2749 op1 = XEXP (op1, 0);
2753 || GET_MODE (op1) != SImode
2754 || GET_MODE (op2) != SImode
2755 || ((SUBREG_BYTE (op1) != 0
2756 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2757 && (SUBREG_BYTE (op2) != 0
2758 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2761 op1 = SUBREG_REG (op1);
2762 op2 = SUBREG_REG (op2);
2766 || GET_MODE (op1) != DImode)
2772 /* Return 1 if INSN may be converted into vector
2776 scalar_to_vector_candidate_p (rtx_insn *insn)
2778 rtx def_set = single_set (insn);
2783 if (has_non_address_hard_reg (insn))
2786 rtx src = SET_SRC (def_set);
2787 rtx dst = SET_DEST (def_set);
2789 if (GET_CODE (src) == COMPARE)
2790 return convertible_comparison_p (insn);
2792 /* We are interested in DImode promotion only. */
2793 if (GET_MODE (src) != DImode
2794 || GET_MODE (dst) != DImode)
2797 if (!REG_P (dst) && !MEM_P (dst))
2800 switch (GET_CODE (src))
2819 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0))
2820 /* Check for andnot case. */
2821 && (GET_CODE (src) != AND
2822 || GET_CODE (XEXP (src, 0)) != NOT
2823 || !REG_P (XEXP (XEXP (src, 0), 0))))
2826 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2829 if (GET_MODE (XEXP (src, 0)) != DImode
2830 || GET_MODE (XEXP (src, 1)) != DImode)
2836 /* For a given bitmap of insn UIDs scans all instruction and
2837 remove insn from CANDIDATES in case it has both convertible
2838 and not convertible definitions.
2840 All insns in a bitmap are conversion candidates according to
2841 scalar_to_vector_candidate_p. Currently it implies all insns
2845 remove_non_convertible_regs (bitmap candidates)
2849 bitmap regs = BITMAP_ALLOC (NULL);
2851 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2853 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2854 rtx reg = SET_DEST (def_set);
2857 || bitmap_bit_p (regs, REGNO (reg))
2858 || HARD_REGISTER_P (reg))
2861 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2863 def = DF_REF_NEXT_REG (def))
2865 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2869 "r%d has non convertible definition in insn %d\n",
2870 REGNO (reg), DF_REF_INSN_UID (def));
2872 bitmap_set_bit (regs, REGNO (reg));
2878 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2880 for (df_ref def = DF_REG_DEF_CHAIN (id);
2882 def = DF_REF_NEXT_REG (def))
2883 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2886 fprintf (dump_file, "Removing insn %d from candidates list\n",
2887 DF_REF_INSN_UID (def));
2889 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2902 static unsigned max_id;
2904 /* ID of a chain. */
2905 unsigned int chain_id;
2906 /* A queue of instructions to be included into a chain. */
2908 /* Instructions included into a chain. */
2910 /* All registers defined by a chain. */
2912 /* Registers used in both vector and sclar modes. */
2915 void build (bitmap candidates, unsigned insn_uid);
2916 int compute_convert_gain ();
2920 void add_insn (bitmap candidates, unsigned insn_uid);
2921 void add_to_queue (unsigned insn_uid);
2922 void mark_dual_mode_def (df_ref def);
2923 void analyze_register_chain (bitmap candidates, df_ref ref);
2924 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2925 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2926 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2927 void convert_insn (rtx_insn *insn);
2928 void convert_op (rtx *op, rtx_insn *insn);
2929 void convert_reg (unsigned regno);
2930 void make_vector_copies (unsigned regno);
2933 unsigned scalar_chain::max_id = 0;
2935 /* Initialize new chain. */
2937 scalar_chain::scalar_chain ()
2939 chain_id = ++max_id;
2942 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2944 bitmap_obstack_initialize (NULL);
2945 insns = BITMAP_ALLOC (NULL);
2946 defs = BITMAP_ALLOC (NULL);
2947 defs_conv = BITMAP_ALLOC (NULL);
2951 /* Free chain's data. */
2953 scalar_chain::~scalar_chain ()
2955 BITMAP_FREE (insns);
2957 BITMAP_FREE (defs_conv);
2958 bitmap_obstack_release (NULL);
2961 /* Add instruction into chains' queue. */
2964 scalar_chain::add_to_queue (unsigned insn_uid)
2966 if (bitmap_bit_p (insns, insn_uid)
2967 || bitmap_bit_p (queue, insn_uid))
2971 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2972 insn_uid, chain_id);
2973 bitmap_set_bit (queue, insn_uid);
2976 /* Mark register defined by DEF as requiring conversion. */
2979 scalar_chain::mark_dual_mode_def (df_ref def)
2981 gcc_assert (DF_REF_REG_DEF_P (def));
2983 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2988 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2989 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2991 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2994 /* Check REF's chain to add new insns into a queue
2995 and find registers requiring conversion. */
2998 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
3002 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
3003 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
3004 add_to_queue (DF_REF_INSN_UID (ref));
3006 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3008 unsigned uid = DF_REF_INSN_UID (chain->ref);
3010 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3013 if (!DF_REF_REG_MEM_P (chain->ref))
3015 if (bitmap_bit_p (insns, uid))
3018 if (bitmap_bit_p (candidates, uid))
3025 if (DF_REF_REG_DEF_P (chain->ref))
3028 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3029 DF_REF_REGNO (chain->ref), uid);
3030 mark_dual_mode_def (chain->ref);
3035 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3036 DF_REF_REGNO (chain->ref), uid);
3037 mark_dual_mode_def (ref);
3042 /* Add instruction into a chain. */
3045 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3047 if (bitmap_bit_p (insns, insn_uid))
3051 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3053 bitmap_set_bit (insns, insn_uid);
3055 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3056 rtx def_set = single_set (insn);
3057 if (def_set && REG_P (SET_DEST (def_set))
3058 && !HARD_REGISTER_P (SET_DEST (def_set)))
3059 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3063 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3064 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3065 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3067 def = DF_REF_NEXT_REG (def))
3068 analyze_register_chain (candidates, def);
3069 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3070 if (!DF_REF_REG_MEM_P (ref))
3071 analyze_register_chain (candidates, ref);
3074 /* Build new chain starting from insn INSN_UID recursively
3075 adding all dependent uses and definitions. */
3078 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3080 queue = BITMAP_ALLOC (NULL);
3081 bitmap_set_bit (queue, insn_uid);
3084 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3086 while (!bitmap_empty_p (queue))
3088 insn_uid = bitmap_first_set_bit (queue);
3089 bitmap_clear_bit (queue, insn_uid);
3090 bitmap_clear_bit (candidates, insn_uid);
3091 add_insn (candidates, insn_uid);
3096 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3097 fprintf (dump_file, " insns: ");
3098 dump_bitmap (dump_file, insns);
3099 if (!bitmap_empty_p (defs_conv))
3103 const char *comma = "";
3104 fprintf (dump_file, " defs to convert: ");
3105 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3107 fprintf (dump_file, "%sr%d", comma, id);
3110 fprintf (dump_file, "\n");
3114 BITMAP_FREE (queue);
3117 /* Compute a gain for chain conversion. */
3120 scalar_chain::compute_convert_gain ()
3128 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3130 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3132 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3133 rtx def_set = single_set (insn);
3134 rtx src = SET_SRC (def_set);
3135 rtx dst = SET_DEST (def_set);
3137 if (REG_P (src) && REG_P (dst))
3138 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3139 else if (REG_P (src) && MEM_P (dst))
3140 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3141 else if (MEM_P (src) && REG_P (dst))
3142 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3143 else if (GET_CODE (src) == PLUS
3144 || GET_CODE (src) == MINUS
3145 || GET_CODE (src) == IOR
3146 || GET_CODE (src) == XOR
3147 || GET_CODE (src) == AND)
3148 gain += ix86_cost->add;
3149 else if (GET_CODE (src) == COMPARE)
3151 /* Assume comparison cost is the same. */
3158 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
3160 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3161 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3164 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
3169 fprintf (dump_file, " Total gain: %d\n", gain);
3174 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3177 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3180 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3182 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3184 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3187 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3188 else if (fmt[i] == 'E')
3189 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3190 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3197 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3200 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3202 replace_with_subreg (single_set (insn), reg, new_reg);
3205 /* Insert generated conversion instruction sequence INSNS
3206 after instruction AFTER. New BB may be required in case
3207 instruction has EH region attached. */
3210 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3212 if (!control_flow_insn_p (after))
3214 emit_insn_after (insns, after);
3218 basic_block bb = BLOCK_FOR_INSN (after);
3219 edge e = find_fallthru_edge (bb->succs);
3222 basic_block new_bb = split_edge (e);
3223 emit_insn_after (insns, BB_HEAD (new_bb));
3226 /* Make vector copies for all register REGNO definitions
3227 and replace its uses in a chain. */
3230 scalar_chain::make_vector_copies (unsigned regno)
3232 rtx reg = regno_reg_rtx[regno];
3233 rtx vreg = gen_reg_rtx (DImode);
3236 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3237 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3239 rtx_insn *insn = DF_REF_INSN (ref);
3244 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3245 CONST0_RTX (V4SImode),
3246 gen_rtx_SUBREG (SImode, reg, 0)));
3247 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3248 gen_rtx_SUBREG (V4SImode, vreg, 0),
3249 gen_rtx_SUBREG (SImode, reg, 4),
3252 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3254 rtx tmp = gen_reg_rtx (DImode);
3255 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3256 CONST0_RTX (V4SImode),
3257 gen_rtx_SUBREG (SImode, reg, 0)));
3258 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3259 CONST0_RTX (V4SImode),
3260 gen_rtx_SUBREG (SImode, reg, 4)));
3261 emit_insn (gen_vec_interleave_lowv4si
3262 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3263 gen_rtx_SUBREG (V4SImode, vreg, 0),
3264 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3268 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3269 emit_move_insn (adjust_address (tmp, SImode, 0),
3270 gen_rtx_SUBREG (SImode, reg, 0));
3271 emit_move_insn (adjust_address (tmp, SImode, 4),
3272 gen_rtx_SUBREG (SImode, reg, 4));
3273 emit_move_insn (vreg, tmp);
3275 emit_conversion_insns (get_insns (), insn);
3280 " Copied r%d to a vector register r%d for insn %d\n",
3281 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3284 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3285 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3287 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3290 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3291 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3295 /* Convert all definitions of register REGNO
3296 and fix its uses. Scalar copies may be created
3297 in case register is used in not convertible insn. */
3300 scalar_chain::convert_reg (unsigned regno)
3302 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3303 rtx reg = regno_reg_rtx[regno];
3304 rtx scopy = NULL_RTX;
3308 conv = BITMAP_ALLOC (NULL);
3309 bitmap_copy (conv, insns);
3312 scopy = gen_reg_rtx (DImode);
3314 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3316 rtx_insn *insn = DF_REF_INSN (ref);
3317 rtx def_set = single_set (insn);
3318 rtx src = SET_SRC (def_set);
3319 rtx reg = DF_REF_REG (ref);
3323 replace_with_subreg_in_insn (insn, reg, reg);
3324 bitmap_clear_bit (conv, INSN_UID (insn));
3329 rtx vcopy = gen_reg_rtx (V2DImode);
3332 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3334 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3335 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3336 gen_rtx_SUBREG (SImode, vcopy, 0));
3337 emit_move_insn (vcopy,
3338 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3339 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3340 gen_rtx_SUBREG (SImode, vcopy, 0));
3344 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3345 emit_move_insn (tmp, reg);
3346 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3347 adjust_address (tmp, SImode, 0));
3348 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3349 adjust_address (tmp, SImode, 4));
3351 emit_conversion_insns (get_insns (), insn);
3356 " Copied r%d to a scalar register r%d for insn %d\n",
3357 regno, REGNO (scopy), INSN_UID (insn));
3361 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3362 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3364 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3366 rtx def_set = single_set (DF_REF_INSN (ref));
3367 if (!MEM_P (SET_DEST (def_set))
3368 || !REG_P (SET_SRC (def_set)))
3369 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3370 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3373 else if (NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3375 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3376 df_insn_rescan (DF_REF_INSN (ref));
3382 /* Convert operand OP in INSN. All register uses
3383 are converted during registers conversion.
3384 Therefore we should just handle memory operands. */
3387 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3389 *op = copy_rtx_if_shared (*op);
3391 if (GET_CODE (*op) == NOT)
3393 convert_op (&XEXP (*op, 0), insn);
3394 PUT_MODE (*op, V2DImode);
3396 else if (MEM_P (*op))
3398 rtx tmp = gen_reg_rtx (DImode);
3400 emit_insn_before (gen_move_insn (tmp, *op), insn);
3401 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3404 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3405 INSN_UID (insn), REGNO (tmp));
3409 gcc_assert (SUBREG_P (*op));
3410 gcc_assert (GET_MODE (*op) == V2DImode);
3414 /* Convert INSN to vector mode. */
3417 scalar_chain::convert_insn (rtx_insn *insn)
3419 rtx def_set = single_set (insn);
3420 rtx src = SET_SRC (def_set);
3421 rtx dst = SET_DEST (def_set);
3424 if (MEM_P (dst) && !REG_P (src))
3426 /* There are no scalar integer instructions and therefore
3427 temporary register usage is required. */
3428 rtx tmp = gen_reg_rtx (DImode);
3429 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3430 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3433 switch (GET_CODE (src))
3440 convert_op (&XEXP (src, 0), insn);
3441 convert_op (&XEXP (src, 1), insn);
3442 PUT_MODE (src, V2DImode);
3447 convert_op (&src, insn);
3454 gcc_assert (GET_MODE (src) == V2DImode);
3458 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3460 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3461 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3464 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3466 subreg = copy_rtx_if_shared (src);
3467 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3468 copy_rtx_if_shared (subreg),
3469 copy_rtx_if_shared (subreg)),
3471 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3472 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3473 copy_rtx_if_shared (src)),
3481 SET_SRC (def_set) = src;
3482 SET_DEST (def_set) = dst;
3484 /* Drop possible dead definitions. */
3485 PATTERN (insn) = def_set;
3487 INSN_CODE (insn) = -1;
3488 recog_memoized (insn);
3489 df_insn_rescan (insn);
3492 /* Convert whole chain creating required register
3493 conversions and copies. */
3496 scalar_chain::convert ()
3500 int converted_insns = 0;
3502 if (!dbg_cnt (stv_conversion))
3506 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3508 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3511 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3512 make_vector_copies (id);
3514 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3516 convert_insn (DF_INSN_UID_GET (id)->insn);
3520 return converted_insns;
3523 /* Main STV pass function. Find and convert scalar
3524 instructions into vector mode when profitable. */
3527 convert_scalars_to_vector ()
3531 int converted_insns = 0;
3533 bitmap_obstack_initialize (NULL);
3534 candidates = BITMAP_ALLOC (NULL);
3536 calculate_dominance_info (CDI_DOMINATORS);
3537 df_set_flags (DF_DEFER_INSN_RESCAN);
3538 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3539 df_md_add_problem ();
3542 /* Find all instructions we want to convert into vector mode. */
3544 fprintf (dump_file, "Searching for mode conversion candidates...\n");
3546 FOR_EACH_BB_FN (bb, cfun)
3549 FOR_BB_INSNS (bb, insn)
3550 if (scalar_to_vector_candidate_p (insn))
3553 fprintf (dump_file, " insn %d is marked as a candidate\n",
3556 bitmap_set_bit (candidates, INSN_UID (insn));
3560 remove_non_convertible_regs (candidates);
3562 if (bitmap_empty_p (candidates))
3564 fprintf (dump_file, "There are no candidates for optimization.\n");
3566 while (!bitmap_empty_p (candidates))
3568 unsigned uid = bitmap_first_set_bit (candidates);
3571 /* Find instructions chain we want to convert to vector mode.
3572 Check all uses and definitions to estimate all required
3574 chain.build (candidates, uid);
3576 if (chain.compute_convert_gain () > 0)
3577 converted_insns += chain.convert ();
3580 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3585 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3587 BITMAP_FREE (candidates);
3588 bitmap_obstack_release (NULL);
3589 df_process_deferred_rescans ();
3591 /* Conversion means we may have 128bit register spills/fills
3592 which require aligned stack. */
3593 if (converted_insns)
3595 if (crtl->stack_alignment_needed < 128)
3596 crtl->stack_alignment_needed = 128;
3597 if (crtl->stack_alignment_estimated < 128)
3598 crtl->stack_alignment_estimated = 128;
3606 const pass_data pass_data_insert_vzeroupper =
3608 RTL_PASS, /* type */
3609 "vzeroupper", /* name */
3610 OPTGROUP_NONE, /* optinfo_flags */
3611 TV_NONE, /* tv_id */
3612 0, /* properties_required */
3613 0, /* properties_provided */
3614 0, /* properties_destroyed */
3615 0, /* todo_flags_start */
3616 TODO_df_finish, /* todo_flags_finish */
3619 class pass_insert_vzeroupper : public rtl_opt_pass
3622 pass_insert_vzeroupper(gcc::context *ctxt)
3623 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3626 /* opt_pass methods: */
3627 virtual bool gate (function *)
3629 return TARGET_AVX && !TARGET_AVX512F
3630 && TARGET_VZEROUPPER && flag_expensive_optimizations
3634 virtual unsigned int execute (function *)
3636 return rest_of_handle_insert_vzeroupper ();
3639 }; // class pass_insert_vzeroupper
3641 const pass_data pass_data_stv =
3643 RTL_PASS, /* type */
3645 OPTGROUP_NONE, /* optinfo_flags */
3646 TV_NONE, /* tv_id */
3647 0, /* properties_required */
3648 0, /* properties_provided */
3649 0, /* properties_destroyed */
3650 0, /* todo_flags_start */
3651 TODO_df_finish, /* todo_flags_finish */
3654 class pass_stv : public rtl_opt_pass
3657 pass_stv (gcc::context *ctxt)
3658 : rtl_opt_pass (pass_data_stv, ctxt)
3661 /* opt_pass methods: */
3662 virtual bool gate (function *)
3664 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3667 virtual unsigned int execute (function *)
3669 return convert_scalars_to_vector ();
3672 }; // class pass_stv
3677 make_pass_insert_vzeroupper (gcc::context *ctxt)
3679 return new pass_insert_vzeroupper (ctxt);
3683 make_pass_stv (gcc::context *ctxt)
3685 return new pass_stv (ctxt);
3688 /* Return true if a red-zone is in use. */
3691 ix86_using_red_zone (void)
3693 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3696 /* Return a string that documents the current -m options. The caller is
3697 responsible for freeing the string. */
3700 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3701 const char *tune, enum fpmath_unit fpmath,
3704 struct ix86_target_opts
3706 const char *option; /* option string */
3707 HOST_WIDE_INT mask; /* isa mask options */
3710 /* This table is ordered so that options like -msse4.2 that imply
3711 preceding options while match those first. */
3712 static struct ix86_target_opts isa_opts[] =
3714 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3715 { "-mfma", OPTION_MASK_ISA_FMA },
3716 { "-mxop", OPTION_MASK_ISA_XOP },
3717 { "-mlwp", OPTION_MASK_ISA_LWP },
3718 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3719 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3720 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3721 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3722 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3723 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3724 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3725 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3726 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3727 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3728 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3729 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3730 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3731 { "-msse3", OPTION_MASK_ISA_SSE3 },
3732 { "-msse2", OPTION_MASK_ISA_SSE2 },
3733 { "-msse", OPTION_MASK_ISA_SSE },
3734 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3735 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3736 { "-mmmx", OPTION_MASK_ISA_MMX },
3737 { "-mabm", OPTION_MASK_ISA_ABM },
3738 { "-mbmi", OPTION_MASK_ISA_BMI },
3739 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3740 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3741 { "-mhle", OPTION_MASK_ISA_HLE },
3742 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3743 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3744 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3745 { "-madx", OPTION_MASK_ISA_ADX },
3746 { "-mtbm", OPTION_MASK_ISA_TBM },
3747 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3748 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3749 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3750 { "-maes", OPTION_MASK_ISA_AES },
3751 { "-msha", OPTION_MASK_ISA_SHA },
3752 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3753 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3754 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3755 { "-mf16c", OPTION_MASK_ISA_F16C },
3756 { "-mrtm", OPTION_MASK_ISA_RTM },
3757 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3758 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3759 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3760 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3761 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3762 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3763 { "-mmpx", OPTION_MASK_ISA_MPX },
3764 { "-mclwb", OPTION_MASK_ISA_CLWB },
3765 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3766 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3767 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3768 { "-mpku", OPTION_MASK_ISA_PKU },
3772 static struct ix86_target_opts flag_opts[] =
3774 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3775 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3776 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3777 { "-m80387", MASK_80387 },
3778 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3779 { "-malign-double", MASK_ALIGN_DOUBLE },
3780 { "-mcld", MASK_CLD },
3781 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3782 { "-mieee-fp", MASK_IEEE_FP },
3783 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3784 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3785 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3786 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3787 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3788 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3789 { "-mno-red-zone", MASK_NO_RED_ZONE },
3790 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3791 { "-mrecip", MASK_RECIP },
3792 { "-mrtd", MASK_RTD },
3793 { "-msseregparm", MASK_SSEREGPARM },
3794 { "-mstack-arg-probe", MASK_STACK_PROBE },
3795 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3796 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3797 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3798 { "-mvzeroupper", MASK_VZEROUPPER },
3799 { "-mstv", MASK_STV},
3800 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3801 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3802 { "-mprefer-avx128", MASK_PREFER_AVX128},
3805 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3808 char target_other[40];
3818 memset (opts, '\0', sizeof (opts));
3820 /* Add -march= option. */
3823 opts[num][0] = "-march=";
3824 opts[num++][1] = arch;
3827 /* Add -mtune= option. */
3830 opts[num][0] = "-mtune=";
3831 opts[num++][1] = tune;
3834 /* Add -m32/-m64/-mx32. */
3835 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3837 if ((isa & OPTION_MASK_ABI_64) != 0)
3841 isa &= ~ (OPTION_MASK_ISA_64BIT
3842 | OPTION_MASK_ABI_64
3843 | OPTION_MASK_ABI_X32);
3847 opts[num++][0] = abi;
3849 /* Pick out the options in isa options. */
3850 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3852 if ((isa & isa_opts[i].mask) != 0)
3854 opts[num++][0] = isa_opts[i].option;
3855 isa &= ~ isa_opts[i].mask;
3859 if (isa && add_nl_p)
3861 opts[num++][0] = isa_other;
3862 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3866 /* Add flag options. */
3867 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3869 if ((flags & flag_opts[i].mask) != 0)
3871 opts[num++][0] = flag_opts[i].option;
3872 flags &= ~ flag_opts[i].mask;
3876 if (flags && add_nl_p)
3878 opts[num++][0] = target_other;
3879 sprintf (target_other, "(other flags: %#x)", flags);
3882 /* Add -fpmath= option. */
3885 opts[num][0] = "-mfpmath=";
3886 switch ((int) fpmath)
3889 opts[num++][1] = "387";
3893 opts[num++][1] = "sse";
3896 case FPMATH_387 | FPMATH_SSE:
3897 opts[num++][1] = "sse+387";
3909 gcc_assert (num < ARRAY_SIZE (opts));
3911 /* Size the string. */
3913 sep_len = (add_nl_p) ? 3 : 1;
3914 for (i = 0; i < num; i++)
3917 for (j = 0; j < 2; j++)
3919 len += strlen (opts[i][j]);
3922 /* Build the string. */
3923 ret = ptr = (char *) xmalloc (len);
3926 for (i = 0; i < num; i++)
3930 for (j = 0; j < 2; j++)
3931 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3938 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3946 for (j = 0; j < 2; j++)
3949 memcpy (ptr, opts[i][j], len2[j]);
3951 line_len += len2[j];
3956 gcc_assert (ret + len >= ptr);
3961 /* Return true, if profiling code should be emitted before
3962 prologue. Otherwise it returns false.
3963 Note: For x86 with "hotfix" it is sorried. */
3965 ix86_profile_before_prologue (void)
3967 return flag_fentry != 0;
3970 /* Function that is callable from the debugger to print the current
3972 void ATTRIBUTE_UNUSED
3973 ix86_debug_options (void)
3975 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3976 ix86_arch_string, ix86_tune_string,
3981 fprintf (stderr, "%s\n\n", opts);
3985 fputs ("<no options>\n\n", stderr);
3990 /* Return true if T is one of the bytes we should avoid with
3994 ix86_rop_should_change_byte_p (int t)
3996 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3999 static const char *stringop_alg_names[] = {
4001 #define DEF_ALG(alg, name) #name,
4002 #include "stringop.def"
4007 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
4008 The string is of the following form (or comma separated list of it):
4010 strategy_alg:max_size:[align|noalign]
4012 where the full size range for the strategy is either [0, max_size] or
4013 [min_size, max_size], in which min_size is the max_size + 1 of the
4014 preceding range. The last size range must have max_size == -1.
4019 -mmemcpy-strategy=libcall:-1:noalign
4021 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4025 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4027 This is to tell the compiler to use the following strategy for memset
4028 1) when the expected size is between [1, 16], use rep_8byte strategy;
4029 2) when the size is between [17, 2048], use vector_loop;
4030 3) when the size is > 2048, use libcall. */
4032 struct stringop_size_range
4040 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4042 const struct stringop_algs *default_algs;
4043 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4044 char *curr_range_str, *next_range_str;
4048 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4050 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4052 curr_range_str = strategy_str;
4059 next_range_str = strchr (curr_range_str, ',');
4061 *next_range_str++ = '\0';
4063 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4064 alg_name, &maxs, align))
4066 error ("wrong arg %s to option %s", curr_range_str,
4067 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4071 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4073 error ("size ranges of option %s should be increasing",
4074 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4078 for (i = 0; i < last_alg; i++)
4079 if (!strcmp (alg_name, stringop_alg_names[i]))
4084 error ("wrong stringop strategy name %s specified for option %s",
4086 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4090 if ((stringop_alg) i == rep_prefix_8_byte
4093 /* rep; movq isn't available in 32-bit code. */
4094 error ("stringop strategy name %s specified for option %s "
4095 "not supported for 32-bit code",
4097 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4101 input_ranges[n].max = maxs;
4102 input_ranges[n].alg = (stringop_alg) i;
4103 if (!strcmp (align, "align"))
4104 input_ranges[n].noalign = false;
4105 else if (!strcmp (align, "noalign"))
4106 input_ranges[n].noalign = true;
4109 error ("unknown alignment %s specified for option %s",
4110 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4114 curr_range_str = next_range_str;
4116 while (curr_range_str);
4118 if (input_ranges[n - 1].max != -1)
4120 error ("the max value for the last size range should be -1"
4122 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4126 if (n > MAX_STRINGOP_ALGS)
4128 error ("too many size ranges specified in option %s",
4129 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4133 /* Now override the default algs array. */
4134 for (i = 0; i < n; i++)
4136 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4137 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4138 = input_ranges[i].alg;
4139 *const_cast<int *>(&default_algs->size[i].noalign)
4140 = input_ranges[i].noalign;
4145 /* parse -mtune-ctrl= option. When DUMP is true,
4146 print the features that are explicitly set. */
4149 parse_mtune_ctrl_str (bool dump)
4151 if (!ix86_tune_ctrl_string)
4154 char *next_feature_string = NULL;
4155 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4156 char *orig = curr_feature_string;
4162 next_feature_string = strchr (curr_feature_string, ',');
4163 if (next_feature_string)
4164 *next_feature_string++ = '\0';
4165 if (*curr_feature_string == '^')
4167 curr_feature_string++;
4170 for (i = 0; i < X86_TUNE_LAST; i++)
4172 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4174 ix86_tune_features[i] = !clear;
4176 fprintf (stderr, "Explicitly %s feature %s\n",
4177 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4181 if (i == X86_TUNE_LAST)
4182 error ("Unknown parameter to option -mtune-ctrl: %s",
4183 clear ? curr_feature_string - 1 : curr_feature_string);
4184 curr_feature_string = next_feature_string;
4186 while (curr_feature_string);
4190 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4194 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4196 unsigned int ix86_tune_mask = 1u << ix86_tune;
4199 for (i = 0; i < X86_TUNE_LAST; ++i)
4201 if (ix86_tune_no_default)
4202 ix86_tune_features[i] = 0;
4204 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4209 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4210 for (i = 0; i < X86_TUNE_LAST; i++)
4211 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4212 ix86_tune_features[i] ? "on" : "off");
4215 parse_mtune_ctrl_str (dump);
4219 /* Default align_* from the processor table. */
4222 ix86_default_align (struct gcc_options *opts)
4224 if (opts->x_align_loops == 0)
4226 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4227 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4229 if (opts->x_align_jumps == 0)
4231 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4232 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4234 if (opts->x_align_functions == 0)
4236 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4240 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4243 ix86_override_options_after_change (void)
4245 ix86_default_align (&global_options);
4248 /* Override various settings based on options. If MAIN_ARGS_P, the
4249 options are from the command line, otherwise they are from
4253 ix86_option_override_internal (bool main_args_p,
4254 struct gcc_options *opts,
4255 struct gcc_options *opts_set)
4258 unsigned int ix86_arch_mask;
4259 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4264 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4265 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4266 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4267 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4268 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4269 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4270 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4271 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4272 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4273 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4274 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4275 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4276 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4277 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4278 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4279 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4280 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4281 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4282 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4283 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4284 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4285 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4286 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4287 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4288 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4289 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4290 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4291 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4292 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4293 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4294 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4295 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4296 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4297 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4298 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4299 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4300 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4301 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4302 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4303 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4304 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4305 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4306 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4307 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4308 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4309 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4310 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4311 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4312 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4313 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4314 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4315 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4316 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4317 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4318 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4319 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4320 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4321 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4322 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4323 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4324 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4327 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4328 | PTA_CX16 | PTA_FXSR)
4329 #define PTA_NEHALEM \
4330 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4331 #define PTA_WESTMERE \
4332 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4333 #define PTA_SANDYBRIDGE \
4334 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4335 #define PTA_IVYBRIDGE \
4336 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4337 #define PTA_HASWELL \
4338 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4339 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4340 #define PTA_BROADWELL \
4341 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4342 #define PTA_SKYLAKE \
4343 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4344 #define PTA_SKYLAKE_AVX512 \
4345 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4346 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4348 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4349 #define PTA_BONNELL \
4350 (PTA_CORE2 | PTA_MOVBE)
4351 #define PTA_SILVERMONT \
4352 (PTA_WESTMERE | PTA_MOVBE)
4354 /* if this reaches 64, need to widen struct pta flags below */
4358 const char *const name; /* processor name or nickname. */
4359 const enum processor_type processor;
4360 const enum attr_cpu schedule;
4361 const unsigned HOST_WIDE_INT flags;
4363 const processor_alias_table[] =
4365 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4366 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4367 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4368 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4369 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4370 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4371 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4372 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4373 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4374 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4375 PTA_MMX | PTA_SSE | PTA_FXSR},
4376 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4377 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4378 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4379 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4380 PTA_MMX | PTA_SSE | PTA_FXSR},
4381 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4382 PTA_MMX | PTA_SSE | PTA_FXSR},
4383 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4384 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4385 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4386 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4387 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4388 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4389 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4390 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4391 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4392 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4393 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4394 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4395 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4396 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4397 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4398 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4400 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4402 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4404 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4406 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4407 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4408 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4409 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4410 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4411 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4412 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4413 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4414 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4415 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4416 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4417 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4418 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4419 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4420 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4421 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4422 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4423 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4424 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4425 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4426 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4427 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4428 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4429 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4430 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4431 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4432 {"x86-64", PROCESSOR_K8, CPU_K8,
4433 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4434 {"k8", PROCESSOR_K8, CPU_K8,
4435 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4436 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4437 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4438 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4439 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4440 {"opteron", PROCESSOR_K8, CPU_K8,
4441 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4442 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4443 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4444 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4445 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4446 {"athlon64", PROCESSOR_K8, CPU_K8,
4447 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4448 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4449 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4450 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4451 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4452 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4453 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4454 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4455 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4456 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4457 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4458 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4459 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4460 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4461 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4462 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4463 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4464 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4465 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4466 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4467 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4468 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4469 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4470 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4471 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4472 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4473 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4474 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4475 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4476 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4477 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4478 | PTA_XSAVEOPT | PTA_FSGSBASE},
4479 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4480 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4481 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4482 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4483 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4484 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4485 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4486 | PTA_MOVBE | PTA_MWAITX},
4487 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4488 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4489 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4490 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4491 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4492 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4493 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4494 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4495 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4496 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4497 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4498 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4499 | PTA_FXSR | PTA_XSAVE},
4500 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4501 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4502 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4503 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4504 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4505 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4507 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4509 | PTA_HLE /* flags are only used for -march switch. */ },
4512 /* -mrecip options. */
4515 const char *string; /* option name */
4516 unsigned int mask; /* mask bits to set */
4518 const recip_options[] =
4520 { "all", RECIP_MASK_ALL },
4521 { "none", RECIP_MASK_NONE },
4522 { "div", RECIP_MASK_DIV },
4523 { "sqrt", RECIP_MASK_SQRT },
4524 { "vec-div", RECIP_MASK_VEC_DIV },
4525 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4528 int const pta_size = ARRAY_SIZE (processor_alias_table);
4530 /* Set up prefix/suffix so the error messages refer to either the command
4531 line argument, or the attribute(target). */
4540 prefix = "option(\"";
4545 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4546 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4547 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4548 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4549 #ifdef TARGET_BI_ARCH
4552 #if TARGET_BI_ARCH == 1
4553 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4554 is on and OPTION_MASK_ABI_X32 is off. We turn off
4555 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4557 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4558 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4560 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4561 on and OPTION_MASK_ABI_64 is off. We turn off
4562 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4563 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4564 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4565 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4566 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4568 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4569 && TARGET_IAMCU_P (opts->x_target_flags))
4570 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4571 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4575 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4577 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4578 OPTION_MASK_ABI_64 for TARGET_X32. */
4579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4580 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4582 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4583 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4584 | OPTION_MASK_ABI_X32
4585 | OPTION_MASK_ABI_64);
4586 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4588 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4589 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4590 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4591 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4594 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4595 SUBTARGET_OVERRIDE_OPTIONS;
4598 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4599 SUBSUBTARGET_OVERRIDE_OPTIONS;
4602 /* -fPIC is the default for x86_64. */
4603 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4604 opts->x_flag_pic = 2;
4606 /* Need to check -mtune=generic first. */
4607 if (opts->x_ix86_tune_string)
4609 /* As special support for cross compilers we read -mtune=native
4610 as -mtune=generic. With native compilers we won't see the
4611 -mtune=native, as it was changed by the driver. */
4612 if (!strcmp (opts->x_ix86_tune_string, "native"))
4614 opts->x_ix86_tune_string = "generic";
4616 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4617 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4618 "%stune=k8%s or %stune=generic%s instead as appropriate",
4619 prefix, suffix, prefix, suffix, prefix, suffix);
4623 if (opts->x_ix86_arch_string)
4624 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4625 if (!opts->x_ix86_tune_string)
4627 opts->x_ix86_tune_string
4628 = processor_target_table[TARGET_CPU_DEFAULT].name;
4629 ix86_tune_defaulted = 1;
4632 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4633 or defaulted. We need to use a sensible tune option. */
4634 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4636 opts->x_ix86_tune_string = "generic";
4640 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4641 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4643 /* rep; movq isn't available in 32-bit code. */
4644 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4645 opts->x_ix86_stringop_alg = no_stringop;
4648 if (!opts->x_ix86_arch_string)
4649 opts->x_ix86_arch_string
4650 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4651 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4653 ix86_arch_specified = 1;
4655 if (opts_set->x_ix86_pmode)
4657 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4658 && opts->x_ix86_pmode == PMODE_SI)
4659 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4660 && opts->x_ix86_pmode == PMODE_DI))
4661 error ("address mode %qs not supported in the %s bit mode",
4662 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4663 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4666 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4667 ? PMODE_DI : PMODE_SI;
4669 if (!opts_set->x_ix86_abi)
4670 opts->x_ix86_abi = DEFAULT_ABI;
4672 /* For targets using ms ABI enable ms-extensions, if not
4673 explicit turned off. For non-ms ABI we turn off this
4675 if (!opts_set->x_flag_ms_extensions)
4676 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4678 if (opts_set->x_ix86_cmodel)
4680 switch (opts->x_ix86_cmodel)
4684 if (opts->x_flag_pic)
4685 opts->x_ix86_cmodel = CM_SMALL_PIC;
4686 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4687 error ("code model %qs not supported in the %s bit mode",
4693 if (opts->x_flag_pic)
4694 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4695 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4696 error ("code model %qs not supported in the %s bit mode",
4698 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4699 error ("code model %qs not supported in x32 mode",
4705 if (opts->x_flag_pic)
4706 opts->x_ix86_cmodel = CM_LARGE_PIC;
4707 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4708 error ("code model %qs not supported in the %s bit mode",
4710 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4711 error ("code model %qs not supported in x32 mode",
4716 if (opts->x_flag_pic)
4717 error ("code model %s does not support PIC mode", "32");
4718 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4719 error ("code model %qs not supported in the %s bit mode",
4724 if (opts->x_flag_pic)
4726 error ("code model %s does not support PIC mode", "kernel");
4727 opts->x_ix86_cmodel = CM_32;
4729 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4730 error ("code model %qs not supported in the %s bit mode",
4740 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4741 use of rip-relative addressing. This eliminates fixups that
4742 would otherwise be needed if this object is to be placed in a
4743 DLL, and is essentially just as efficient as direct addressing. */
4744 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4745 && (TARGET_RDOS || TARGET_PECOFF))
4746 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4747 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4748 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4750 opts->x_ix86_cmodel = CM_32;
4752 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4754 error ("-masm=intel not supported in this configuration");
4755 opts->x_ix86_asm_dialect = ASM_ATT;
4757 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4758 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4759 sorry ("%i-bit mode not compiled in",
4760 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4762 for (i = 0; i < pta_size; i++)
4763 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4765 ix86_schedule = processor_alias_table[i].schedule;
4766 ix86_arch = processor_alias_table[i].processor;
4767 /* Default cpu tuning to the architecture. */
4768 ix86_tune = ix86_arch;
4770 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4771 && !(processor_alias_table[i].flags & PTA_64BIT))
4772 error ("CPU you selected does not support x86-64 "
4775 if (processor_alias_table[i].flags & PTA_MMX
4776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4778 if (processor_alias_table[i].flags & PTA_3DNOW
4779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4781 if (processor_alias_table[i].flags & PTA_3DNOW_A
4782 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4784 if (processor_alias_table[i].flags & PTA_SSE
4785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4787 if (processor_alias_table[i].flags & PTA_SSE2
4788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4790 if (processor_alias_table[i].flags & PTA_SSE3
4791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4793 if (processor_alias_table[i].flags & PTA_SSSE3
4794 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4796 if (processor_alias_table[i].flags & PTA_SSE4_1
4797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4799 if (processor_alias_table[i].flags & PTA_SSE4_2
4800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4802 if (processor_alias_table[i].flags & PTA_AVX
4803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4805 if (processor_alias_table[i].flags & PTA_AVX2
4806 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4808 if (processor_alias_table[i].flags & PTA_FMA
4809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4811 if (processor_alias_table[i].flags & PTA_SSE4A
4812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4814 if (processor_alias_table[i].flags & PTA_FMA4
4815 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4817 if (processor_alias_table[i].flags & PTA_XOP
4818 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4819 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4820 if (processor_alias_table[i].flags & PTA_LWP
4821 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4822 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4823 if (processor_alias_table[i].flags & PTA_ABM
4824 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4825 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4826 if (processor_alias_table[i].flags & PTA_BMI
4827 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4828 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4829 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4830 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4831 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4832 if (processor_alias_table[i].flags & PTA_TBM
4833 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4834 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4835 if (processor_alias_table[i].flags & PTA_BMI2
4836 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4837 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4838 if (processor_alias_table[i].flags & PTA_CX16
4839 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4840 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4841 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4842 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4843 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4844 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4845 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4846 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4847 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4848 if (processor_alias_table[i].flags & PTA_MOVBE
4849 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4850 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4851 if (processor_alias_table[i].flags & PTA_AES
4852 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4853 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4854 if (processor_alias_table[i].flags & PTA_SHA
4855 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4856 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4857 if (processor_alias_table[i].flags & PTA_PCLMUL
4858 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4859 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4860 if (processor_alias_table[i].flags & PTA_FSGSBASE
4861 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4862 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4863 if (processor_alias_table[i].flags & PTA_RDRND
4864 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4865 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4866 if (processor_alias_table[i].flags & PTA_F16C
4867 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4868 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4869 if (processor_alias_table[i].flags & PTA_RTM
4870 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4871 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4872 if (processor_alias_table[i].flags & PTA_HLE
4873 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4874 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4875 if (processor_alias_table[i].flags & PTA_PRFCHW
4876 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4877 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4878 if (processor_alias_table[i].flags & PTA_RDSEED
4879 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4880 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4881 if (processor_alias_table[i].flags & PTA_ADX
4882 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4883 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4884 if (processor_alias_table[i].flags & PTA_FXSR
4885 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4886 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4887 if (processor_alias_table[i].flags & PTA_XSAVE
4888 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4889 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4890 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4891 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4892 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4893 if (processor_alias_table[i].flags & PTA_AVX512F
4894 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4895 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4896 if (processor_alias_table[i].flags & PTA_AVX512ER
4897 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4898 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4899 if (processor_alias_table[i].flags & PTA_AVX512PF
4900 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4901 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4902 if (processor_alias_table[i].flags & PTA_AVX512CD
4903 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4904 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4905 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4906 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4907 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4908 if (processor_alias_table[i].flags & PTA_PCOMMIT
4909 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4910 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4911 if (processor_alias_table[i].flags & PTA_CLWB
4912 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4913 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4914 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4915 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4916 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4917 if (processor_alias_table[i].flags & PTA_CLZERO
4918 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4919 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4920 if (processor_alias_table[i].flags & PTA_XSAVEC
4921 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4922 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4923 if (processor_alias_table[i].flags & PTA_XSAVES
4924 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4925 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4926 if (processor_alias_table[i].flags & PTA_AVX512DQ
4927 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4928 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4929 if (processor_alias_table[i].flags & PTA_AVX512BW
4930 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4931 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4932 if (processor_alias_table[i].flags & PTA_AVX512VL
4933 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4934 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4935 if (processor_alias_table[i].flags & PTA_MPX
4936 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4937 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4938 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4939 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4940 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4941 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4942 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4943 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4944 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4945 x86_prefetch_sse = true;
4946 if (processor_alias_table[i].flags & PTA_MWAITX
4947 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4948 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4949 if (processor_alias_table[i].flags & PTA_PKU
4950 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4951 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4953 if (!(opts_set->x_target_flags & MASK_80387))
4955 if (processor_alias_table[i].flags & PTA_NO_80387)
4956 opts->x_target_flags &= ~MASK_80387;
4958 opts->x_target_flags |= MASK_80387;
4963 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4964 error ("Intel MPX does not support x32");
4966 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4967 error ("Intel MPX does not support x32");
4969 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4970 error ("generic CPU can be used only for %stune=%s %s",
4971 prefix, suffix, sw);
4972 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4973 error ("intel CPU can be used only for %stune=%s %s",
4974 prefix, suffix, sw);
4975 else if (i == pta_size)
4976 error ("bad value (%s) for %sarch=%s %s",
4977 opts->x_ix86_arch_string, prefix, suffix, sw);
4979 ix86_arch_mask = 1u << ix86_arch;
4980 for (i = 0; i < X86_ARCH_LAST; ++i)
4981 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4983 for (i = 0; i < pta_size; i++)
4984 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4986 ix86_schedule = processor_alias_table[i].schedule;
4987 ix86_tune = processor_alias_table[i].processor;
4988 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4990 if (!(processor_alias_table[i].flags & PTA_64BIT))
4992 if (ix86_tune_defaulted)
4994 opts->x_ix86_tune_string = "x86-64";
4995 for (i = 0; i < pta_size; i++)
4996 if (! strcmp (opts->x_ix86_tune_string,
4997 processor_alias_table[i].name))
4999 ix86_schedule = processor_alias_table[i].schedule;
5000 ix86_tune = processor_alias_table[i].processor;
5003 error ("CPU you selected does not support x86-64 "
5007 /* Intel CPUs have always interpreted SSE prefetch instructions as
5008 NOPs; so, we can enable SSE prefetch instructions even when
5009 -mtune (rather than -march) points us to a processor that has them.
5010 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5011 higher processors. */
5013 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5014 x86_prefetch_sse = true;
5018 if (ix86_tune_specified && i == pta_size)
5019 error ("bad value (%s) for %stune=%s %s",
5020 opts->x_ix86_tune_string, prefix, suffix, sw);
5022 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5024 #ifndef USE_IX86_FRAME_POINTER
5025 #define USE_IX86_FRAME_POINTER 0
5028 #ifndef USE_X86_64_FRAME_POINTER
5029 #define USE_X86_64_FRAME_POINTER 0
5032 /* Set the default values for switches whose default depends on TARGET_64BIT
5033 in case they weren't overwritten by command line options. */
5034 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5036 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5037 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5038 if (opts->x_flag_asynchronous_unwind_tables
5039 && !opts_set->x_flag_unwind_tables
5040 && TARGET_64BIT_MS_ABI)
5041 opts->x_flag_unwind_tables = 1;
5042 if (opts->x_flag_asynchronous_unwind_tables == 2)
5043 opts->x_flag_unwind_tables
5044 = opts->x_flag_asynchronous_unwind_tables = 1;
5045 if (opts->x_flag_pcc_struct_return == 2)
5046 opts->x_flag_pcc_struct_return = 0;
5050 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5051 opts->x_flag_omit_frame_pointer
5052 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5053 if (opts->x_flag_asynchronous_unwind_tables == 2)
5054 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5055 if (opts->x_flag_pcc_struct_return == 2)
5057 /* Intel MCU psABI specifies that -freg-struct-return should
5058 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5059 we check -miamcu so that -freg-struct-return is always
5060 turned on if -miamcu is used. */
5061 if (TARGET_IAMCU_P (opts->x_target_flags))
5062 opts->x_flag_pcc_struct_return = 0;
5064 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5068 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5069 /* TODO: ix86_cost should be chosen at instruction or function granuality
5070 so for cold code we use size_cost even in !optimize_size compilation. */
5071 if (opts->x_optimize_size)
5072 ix86_cost = &ix86_size_cost;
5074 ix86_cost = ix86_tune_cost;
5076 /* Arrange to set up i386_stack_locals for all functions. */
5077 init_machine_status = ix86_init_machine_status;
5079 /* Validate -mregparm= value. */
5080 if (opts_set->x_ix86_regparm)
5082 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5083 warning (0, "-mregparm is ignored in 64-bit mode");
5084 else if (TARGET_IAMCU_P (opts->x_target_flags))
5085 warning (0, "-mregparm is ignored for Intel MCU psABI");
5086 if (opts->x_ix86_regparm > REGPARM_MAX)
5088 error ("-mregparm=%d is not between 0 and %d",
5089 opts->x_ix86_regparm, REGPARM_MAX);
5090 opts->x_ix86_regparm = 0;
5093 if (TARGET_IAMCU_P (opts->x_target_flags)
5094 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5095 opts->x_ix86_regparm = REGPARM_MAX;
5097 /* Default align_* from the processor table. */
5098 ix86_default_align (opts);
5100 /* Provide default for -mbranch-cost= value. */
5101 if (!opts_set->x_ix86_branch_cost)
5102 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5104 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5106 opts->x_target_flags
5107 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5109 /* Enable by default the SSE and MMX builtins. Do allow the user to
5110 explicitly disable any of these. In particular, disabling SSE and
5111 MMX for kernel code is extremely useful. */
5112 if (!ix86_arch_specified)
5113 opts->x_ix86_isa_flags
5114 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5115 | TARGET_SUBTARGET64_ISA_DEFAULT)
5116 & ~opts->x_ix86_isa_flags_explicit);
5118 if (TARGET_RTD_P (opts->x_target_flags))
5119 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5123 opts->x_target_flags
5124 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5126 if (!ix86_arch_specified)
5127 opts->x_ix86_isa_flags
5128 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5130 /* i386 ABI does not specify red zone. It still makes sense to use it
5131 when programmer takes care to stack from being destroyed. */
5132 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5133 opts->x_target_flags |= MASK_NO_RED_ZONE;
5136 /* Keep nonleaf frame pointers. */
5137 if (opts->x_flag_omit_frame_pointer)
5138 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5139 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5140 opts->x_flag_omit_frame_pointer = 1;
5142 /* If we're doing fast math, we don't care about comparison order
5143 wrt NaNs. This lets us use a shorter comparison sequence. */
5144 if (opts->x_flag_finite_math_only)
5145 opts->x_target_flags &= ~MASK_IEEE_FP;
5147 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5148 since the insns won't need emulation. */
5149 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5150 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5152 /* Likewise, if the target doesn't have a 387, or we've specified
5153 software floating point, don't use 387 inline intrinsics. */
5154 if (!TARGET_80387_P (opts->x_target_flags))
5155 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5157 /* Turn on MMX builtins for -msse. */
5158 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5159 opts->x_ix86_isa_flags
5160 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5162 /* Enable SSE prefetch. */
5163 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5164 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5165 x86_prefetch_sse = true;
5167 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5168 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5169 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5170 opts->x_ix86_isa_flags
5171 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5173 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5174 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5175 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5176 opts->x_ix86_isa_flags
5177 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5179 /* Enable lzcnt instruction for -mabm. */
5180 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5181 opts->x_ix86_isa_flags
5182 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5184 /* Validate -mpreferred-stack-boundary= value or default it to
5185 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5186 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5187 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5189 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5190 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5191 int max = (TARGET_SEH ? 4 : 12);
5193 if (opts->x_ix86_preferred_stack_boundary_arg < min
5194 || opts->x_ix86_preferred_stack_boundary_arg > max)
5197 error ("-mpreferred-stack-boundary is not supported "
5200 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5201 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5204 ix86_preferred_stack_boundary
5205 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5208 /* Set the default value for -mstackrealign. */
5209 if (opts->x_ix86_force_align_arg_pointer == -1)
5210 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5212 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5214 /* Validate -mincoming-stack-boundary= value or default it to
5215 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5216 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5217 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5219 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5221 if (opts->x_ix86_incoming_stack_boundary_arg < min
5222 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5223 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5224 opts->x_ix86_incoming_stack_boundary_arg, min);
5227 ix86_user_incoming_stack_boundary
5228 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5229 ix86_incoming_stack_boundary
5230 = ix86_user_incoming_stack_boundary;
5234 #ifndef NO_PROFILE_COUNTERS
5235 if (flag_nop_mcount)
5236 error ("-mnop-mcount is not compatible with this target");
5238 if (flag_nop_mcount && flag_pic)
5239 error ("-mnop-mcount is not implemented for -fPIC");
5241 /* Accept -msseregparm only if at least SSE support is enabled. */
5242 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5243 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5244 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5246 if (opts_set->x_ix86_fpmath)
5248 if (opts->x_ix86_fpmath & FPMATH_SSE)
5250 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5252 if (TARGET_80387_P (opts->x_target_flags))
5254 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5255 opts->x_ix86_fpmath = FPMATH_387;
5258 else if ((opts->x_ix86_fpmath & FPMATH_387)
5259 && !TARGET_80387_P (opts->x_target_flags))
5261 warning (0, "387 instruction set disabled, using SSE arithmetics");
5262 opts->x_ix86_fpmath = FPMATH_SSE;
5266 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5267 fpmath=387. The second is however default at many targets since the
5268 extra 80bit precision of temporaries is considered to be part of ABI.
5269 Overwrite the default at least for -ffast-math.
5270 TODO: -mfpmath=both seems to produce same performing code with bit
5271 smaller binaries. It is however not clear if register allocation is
5272 ready for this setting.
5273 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5274 codegen. We may switch to 387 with -ffast-math for size optimized
5276 else if (fast_math_flags_set_p (&global_options)
5277 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5278 opts->x_ix86_fpmath = FPMATH_SSE;
5280 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5282 /* Use external vectorized library in vectorizing intrinsics. */
5283 if (opts_set->x_ix86_veclibabi_type)
5284 switch (opts->x_ix86_veclibabi_type)
5286 case ix86_veclibabi_type_svml:
5287 ix86_veclib_handler = ix86_veclibabi_svml;
5290 case ix86_veclibabi_type_acml:
5291 ix86_veclib_handler = ix86_veclibabi_acml;
5298 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5299 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5300 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5302 /* If stack probes are required, the space used for large function
5303 arguments on the stack must also be probed, so enable
5304 -maccumulate-outgoing-args so this happens in the prologue. */
5305 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5306 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5308 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5309 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5310 "for correctness", prefix, suffix);
5311 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5314 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5315 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5316 if (fixed_regs[BP_REG]
5317 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5319 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5320 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5322 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5325 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5328 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5329 p = strchr (internal_label_prefix, 'X');
5330 internal_label_prefix_len = p - internal_label_prefix;
5334 /* When scheduling description is not available, disable scheduler pass
5335 so it won't slow down the compilation and make x87 code slower. */
5336 if (!TARGET_SCHEDULE)
5337 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5339 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5340 ix86_tune_cost->simultaneous_prefetches,
5341 opts->x_param_values,
5342 opts_set->x_param_values);
5343 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5344 ix86_tune_cost->prefetch_block,
5345 opts->x_param_values,
5346 opts_set->x_param_values);
5347 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5348 ix86_tune_cost->l1_cache_size,
5349 opts->x_param_values,
5350 opts_set->x_param_values);
5351 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5352 ix86_tune_cost->l2_cache_size,
5353 opts->x_param_values,
5354 opts_set->x_param_values);
5356 /* Restrict number of if-converted SET insns to 1. */
5357 if (TARGET_ONE_IF_CONV_INSN)
5358 maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
5360 opts->x_param_values,
5361 opts_set->x_param_values);
5363 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5364 if (opts->x_flag_prefetch_loop_arrays < 0
5366 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5367 && !opts->x_optimize_size
5368 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5369 opts->x_flag_prefetch_loop_arrays = 1;
5371 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5372 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5373 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5374 targetm.expand_builtin_va_start = NULL;
5376 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5378 ix86_gen_leave = gen_leave_rex64;
5379 if (Pmode == DImode)
5381 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5382 ix86_gen_tls_local_dynamic_base_64
5383 = gen_tls_local_dynamic_base_64_di;
5387 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5388 ix86_gen_tls_local_dynamic_base_64
5389 = gen_tls_local_dynamic_base_64_si;
5393 ix86_gen_leave = gen_leave;
5395 if (Pmode == DImode)
5397 ix86_gen_add3 = gen_adddi3;
5398 ix86_gen_sub3 = gen_subdi3;
5399 ix86_gen_sub3_carry = gen_subdi3_carry;
5400 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5401 ix86_gen_andsp = gen_anddi3;
5402 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5403 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5404 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5405 ix86_gen_monitor = gen_sse3_monitor_di;
5406 ix86_gen_monitorx = gen_monitorx_di;
5407 ix86_gen_clzero = gen_clzero_di;
5411 ix86_gen_add3 = gen_addsi3;
5412 ix86_gen_sub3 = gen_subsi3;
5413 ix86_gen_sub3_carry = gen_subsi3_carry;
5414 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5415 ix86_gen_andsp = gen_andsi3;
5416 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5417 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5418 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5419 ix86_gen_monitor = gen_sse3_monitor_si;
5420 ix86_gen_monitorx = gen_monitorx_si;
5421 ix86_gen_clzero = gen_clzero_si;
5425 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5426 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5427 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5430 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5432 if (opts->x_flag_fentry > 0)
5433 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5435 opts->x_flag_fentry = 0;
5437 else if (TARGET_SEH)
5439 if (opts->x_flag_fentry == 0)
5440 sorry ("-mno-fentry isn%'t compatible with SEH");
5441 opts->x_flag_fentry = 1;
5443 else if (opts->x_flag_fentry < 0)
5445 #if defined(PROFILE_BEFORE_PROLOGUE)
5446 opts->x_flag_fentry = 1;
5448 opts->x_flag_fentry = 0;
5452 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5453 opts->x_target_flags |= MASK_VZEROUPPER;
5454 if (!(opts_set->x_target_flags & MASK_STV))
5455 opts->x_target_flags |= MASK_STV;
5456 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5457 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5458 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5459 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5460 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5461 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5462 /* Enable 128-bit AVX instruction generation
5463 for the auto-vectorizer. */
5464 if (TARGET_AVX128_OPTIMAL
5465 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5466 opts->x_target_flags |= MASK_PREFER_AVX128;
5468 if (opts->x_ix86_recip_name)
5470 char *p = ASTRDUP (opts->x_ix86_recip_name);
5472 unsigned int mask, i;
5475 while ((q = strtok (p, ",")) != NULL)
5486 if (!strcmp (q, "default"))
5487 mask = RECIP_MASK_ALL;
5490 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5491 if (!strcmp (q, recip_options[i].string))
5493 mask = recip_options[i].mask;
5497 if (i == ARRAY_SIZE (recip_options))
5499 error ("unknown option for -mrecip=%s", q);
5501 mask = RECIP_MASK_NONE;
5505 opts->x_recip_mask_explicit |= mask;
5507 opts->x_recip_mask &= ~mask;
5509 opts->x_recip_mask |= mask;
5513 if (TARGET_RECIP_P (opts->x_target_flags))
5514 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5515 else if (opts_set->x_target_flags & MASK_RECIP)
5516 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5518 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5519 for 64-bit Bionic. Also default long double to 64-bit for Intel
5521 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5522 && !(opts_set->x_target_flags
5523 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5524 opts->x_target_flags |= (TARGET_64BIT
5525 ? MASK_LONG_DOUBLE_128
5526 : MASK_LONG_DOUBLE_64);
5528 /* Only one of them can be active. */
5529 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5530 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5532 /* Save the initial options in case the user does function specific
5535 target_option_default_node = target_option_current_node
5536 = build_target_option_node (opts);
5538 /* Handle stack protector */
5539 if (!opts_set->x_ix86_stack_protector_guard)
5540 opts->x_ix86_stack_protector_guard
5541 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5543 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5544 if (opts->x_ix86_tune_memcpy_strategy)
5546 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5547 ix86_parse_stringop_strategy_string (str, false);
5551 if (opts->x_ix86_tune_memset_strategy)
5553 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5554 ix86_parse_stringop_strategy_string (str, true);
5559 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5562 ix86_option_override (void)
5564 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5565 struct register_pass_info insert_vzeroupper_info
5566 = { pass_insert_vzeroupper, "reload",
5567 1, PASS_POS_INSERT_AFTER
5569 opt_pass *pass_stv = make_pass_stv (g);
5570 struct register_pass_info stv_info
5571 = { pass_stv, "combine",
5572 1, PASS_POS_INSERT_AFTER
5575 ix86_option_override_internal (true, &global_options, &global_options_set);
5578 /* This needs to be done at start up. It's convenient to do it here. */
5579 register_pass (&insert_vzeroupper_info);
5580 register_pass (&stv_info);
5583 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5585 ix86_offload_options (void)
5588 return xstrdup ("-foffload-abi=lp64");
5589 return xstrdup ("-foffload-abi=ilp32");
5592 /* Update register usage after having seen the compiler flags. */
5595 ix86_conditional_register_usage (void)
5599 /* For 32-bit targets, squash the REX registers. */
5602 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5603 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5604 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5605 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5606 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5607 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5610 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5611 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5613 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5615 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5617 /* Set/reset conditionally defined registers from
5618 CALL_USED_REGISTERS initializer. */
5619 if (call_used_regs[i] > 1)
5620 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5622 /* Calculate registers of CLOBBERED_REGS register set
5623 as call used registers from GENERAL_REGS register set. */
5624 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5625 && call_used_regs[i])
5626 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5629 /* If MMX is disabled, squash the registers. */
5631 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5632 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5633 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5635 /* If SSE is disabled, squash the registers. */
5637 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5638 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5639 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5641 /* If the FPU is disabled, squash the registers. */
5642 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5643 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5644 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5645 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5647 /* If AVX512F is disabled, squash the registers. */
5648 if (! TARGET_AVX512F)
5650 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5651 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5653 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5654 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5657 /* If MPX is disabled, squash the registers. */
5659 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5660 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5664 /* Save the current options */
5667 ix86_function_specific_save (struct cl_target_option *ptr,
5668 struct gcc_options *opts)
5670 ptr->arch = ix86_arch;
5671 ptr->schedule = ix86_schedule;
5672 ptr->prefetch_sse = x86_prefetch_sse;
5673 ptr->tune = ix86_tune;
5674 ptr->branch_cost = ix86_branch_cost;
5675 ptr->tune_defaulted = ix86_tune_defaulted;
5676 ptr->arch_specified = ix86_arch_specified;
5677 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5678 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5679 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5680 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5681 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5682 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5683 ptr->x_ix86_abi = opts->x_ix86_abi;
5684 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5685 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5686 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5687 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5688 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5689 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5690 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5691 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5692 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5693 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5694 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5695 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5696 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5697 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5698 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5699 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5700 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5701 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5702 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5703 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5705 /* The fields are char but the variables are not; make sure the
5706 values fit in the fields. */
5707 gcc_assert (ptr->arch == ix86_arch);
5708 gcc_assert (ptr->schedule == ix86_schedule);
5709 gcc_assert (ptr->tune == ix86_tune);
5710 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5713 /* Restore the current options */
5716 ix86_function_specific_restore (struct gcc_options *opts,
5717 struct cl_target_option *ptr)
5719 enum processor_type old_tune = ix86_tune;
5720 enum processor_type old_arch = ix86_arch;
5721 unsigned int ix86_arch_mask;
5724 /* We don't change -fPIC. */
5725 opts->x_flag_pic = flag_pic;
5727 ix86_arch = (enum processor_type) ptr->arch;
5728 ix86_schedule = (enum attr_cpu) ptr->schedule;
5729 ix86_tune = (enum processor_type) ptr->tune;
5730 x86_prefetch_sse = ptr->prefetch_sse;
5731 opts->x_ix86_branch_cost = ptr->branch_cost;
5732 ix86_tune_defaulted = ptr->tune_defaulted;
5733 ix86_arch_specified = ptr->arch_specified;
5734 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5735 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5736 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5737 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5738 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5739 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5740 opts->x_ix86_abi = ptr->x_ix86_abi;
5741 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5742 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5743 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5744 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5745 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5746 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5747 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5748 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5749 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5750 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5751 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5752 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5753 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5754 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5755 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5756 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5757 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5758 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5759 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5760 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5761 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5762 /* TODO: ix86_cost should be chosen at instruction or function granuality
5763 so for cold code we use size_cost even in !optimize_size compilation. */
5764 if (opts->x_optimize_size)
5765 ix86_cost = &ix86_size_cost;
5767 ix86_cost = ix86_tune_cost;
5769 /* Recreate the arch feature tests if the arch changed */
5770 if (old_arch != ix86_arch)
5772 ix86_arch_mask = 1u << ix86_arch;
5773 for (i = 0; i < X86_ARCH_LAST; ++i)
5774 ix86_arch_features[i]
5775 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5778 /* Recreate the tune optimization tests */
5779 if (old_tune != ix86_tune)
5780 set_ix86_tune_features (ix86_tune, false);
5783 /* Adjust target options after streaming them in. This is mainly about
5784 reconciling them with global options. */
5787 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5789 /* flag_pic is a global option, but ix86_cmodel is target saved option
5790 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5791 for PIC, or error out. */
5793 switch (ptr->x_ix86_cmodel)
5796 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5800 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5804 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5808 error ("code model %s does not support PIC mode", "kernel");
5815 switch (ptr->x_ix86_cmodel)
5818 ptr->x_ix86_cmodel = CM_SMALL;
5822 ptr->x_ix86_cmodel = CM_MEDIUM;
5826 ptr->x_ix86_cmodel = CM_LARGE;
5834 /* Print the current options */
5837 ix86_function_specific_print (FILE *file, int indent,
5838 struct cl_target_option *ptr)
5841 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5842 NULL, NULL, ptr->x_ix86_fpmath, false);
5844 gcc_assert (ptr->arch < PROCESSOR_max);
5845 fprintf (file, "%*sarch = %d (%s)\n",
5847 ptr->arch, processor_target_table[ptr->arch].name);
5849 gcc_assert (ptr->tune < PROCESSOR_max);
5850 fprintf (file, "%*stune = %d (%s)\n",
5852 ptr->tune, processor_target_table[ptr->tune].name);
5854 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5858 fprintf (file, "%*s%s\n", indent, "", target_string);
5859 free (target_string);
5864 /* Inner function to process the attribute((target(...))), take an argument and
5865 set the current options from the argument. If we have a list, recursively go
5869 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5870 struct gcc_options *opts,
5871 struct gcc_options *opts_set,
5872 struct gcc_options *enum_opts_set)
5877 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5878 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5879 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5880 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5881 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5897 enum ix86_opt_type type;
5902 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5903 IX86_ATTR_ISA ("abm", OPT_mabm),
5904 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5905 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5906 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5907 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5908 IX86_ATTR_ISA ("aes", OPT_maes),
5909 IX86_ATTR_ISA ("sha", OPT_msha),
5910 IX86_ATTR_ISA ("avx", OPT_mavx),
5911 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5912 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5913 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5914 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5915 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5916 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5917 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5918 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5919 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5920 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5921 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5922 IX86_ATTR_ISA ("sse", OPT_msse),
5923 IX86_ATTR_ISA ("sse2", OPT_msse2),
5924 IX86_ATTR_ISA ("sse3", OPT_msse3),
5925 IX86_ATTR_ISA ("sse4", OPT_msse4),
5926 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5927 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5928 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5929 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5930 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5931 IX86_ATTR_ISA ("fma", OPT_mfma),
5932 IX86_ATTR_ISA ("xop", OPT_mxop),
5933 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5934 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5935 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5936 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5937 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5938 IX86_ATTR_ISA ("hle", OPT_mhle),
5939 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5940 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5941 IX86_ATTR_ISA ("adx", OPT_madx),
5942 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5943 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5944 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5945 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5946 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5947 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5948 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5949 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5950 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5951 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5952 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5953 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5954 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5955 IX86_ATTR_ISA ("pku", OPT_mpku),
5958 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5960 /* string options */
5961 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5962 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5965 IX86_ATTR_YES ("cld",
5969 IX86_ATTR_NO ("fancy-math-387",
5970 OPT_mfancy_math_387,
5971 MASK_NO_FANCY_MATH_387),
5973 IX86_ATTR_YES ("ieee-fp",
5977 IX86_ATTR_YES ("inline-all-stringops",
5978 OPT_minline_all_stringops,
5979 MASK_INLINE_ALL_STRINGOPS),
5981 IX86_ATTR_YES ("inline-stringops-dynamically",
5982 OPT_minline_stringops_dynamically,
5983 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5985 IX86_ATTR_NO ("align-stringops",
5986 OPT_mno_align_stringops,
5987 MASK_NO_ALIGN_STRINGOPS),
5989 IX86_ATTR_YES ("recip",
5995 /* If this is a list, recurse to get the options. */
5996 if (TREE_CODE (args) == TREE_LIST)
6000 for (; args; args = TREE_CHAIN (args))
6001 if (TREE_VALUE (args)
6002 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
6003 p_strings, opts, opts_set,
6010 else if (TREE_CODE (args) != STRING_CST)
6012 error ("attribute %<target%> argument not a string");
6016 /* Handle multiple arguments separated by commas. */
6017 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6019 while (next_optstr && *next_optstr != '\0')
6021 char *p = next_optstr;
6023 char *comma = strchr (next_optstr, ',');
6024 const char *opt_string;
6025 size_t len, opt_len;
6030 enum ix86_opt_type type = ix86_opt_unknown;
6036 len = comma - next_optstr;
6037 next_optstr = comma + 1;
6045 /* Recognize no-xxx. */
6046 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6055 /* Find the option. */
6058 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6060 type = attrs[i].type;
6061 opt_len = attrs[i].len;
6062 if (ch == attrs[i].string[0]
6063 && ((type != ix86_opt_str && type != ix86_opt_enum)
6066 && memcmp (p, attrs[i].string, opt_len) == 0)
6069 mask = attrs[i].mask;
6070 opt_string = attrs[i].string;
6075 /* Process the option. */
6078 error ("attribute(target(\"%s\")) is unknown", orig_p);
6082 else if (type == ix86_opt_isa)
6084 struct cl_decoded_option decoded;
6086 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6087 ix86_handle_option (opts, opts_set,
6088 &decoded, input_location);
6091 else if (type == ix86_opt_yes || type == ix86_opt_no)
6093 if (type == ix86_opt_no)
6094 opt_set_p = !opt_set_p;
6097 opts->x_target_flags |= mask;
6099 opts->x_target_flags &= ~mask;
6102 else if (type == ix86_opt_str)
6106 error ("option(\"%s\") was already specified", opt_string);
6110 p_strings[opt] = xstrdup (p + opt_len);
6113 else if (type == ix86_opt_enum)
6118 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6120 set_option (opts, enum_opts_set, opt, value,
6121 p + opt_len, DK_UNSPECIFIED, input_location,
6125 error ("attribute(target(\"%s\")) is unknown", orig_p);
6137 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6140 ix86_valid_target_attribute_tree (tree args,
6141 struct gcc_options *opts,
6142 struct gcc_options *opts_set)
6144 const char *orig_arch_string = opts->x_ix86_arch_string;
6145 const char *orig_tune_string = opts->x_ix86_tune_string;
6146 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6147 int orig_tune_defaulted = ix86_tune_defaulted;
6148 int orig_arch_specified = ix86_arch_specified;
6149 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6152 struct cl_target_option *def
6153 = TREE_TARGET_OPTION (target_option_default_node);
6154 struct gcc_options enum_opts_set;
6156 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6158 /* Process each of the options on the chain. */
6159 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6160 opts_set, &enum_opts_set))
6161 return error_mark_node;
6163 /* If the changed options are different from the default, rerun
6164 ix86_option_override_internal, and then save the options away.
6165 The string options are attribute options, and will be undone
6166 when we copy the save structure. */
6167 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6168 || opts->x_target_flags != def->x_target_flags
6169 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6170 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6171 || enum_opts_set.x_ix86_fpmath)
6173 /* If we are using the default tune= or arch=, undo the string assigned,
6174 and use the default. */
6175 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6177 opts->x_ix86_arch_string
6178 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6180 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6181 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6182 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6183 | OPTION_MASK_ABI_64
6184 | OPTION_MASK_ABI_X32
6185 | OPTION_MASK_CODE16);
6188 else if (!orig_arch_specified)
6189 opts->x_ix86_arch_string = NULL;
6191 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6192 opts->x_ix86_tune_string
6193 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6194 else if (orig_tune_defaulted)
6195 opts->x_ix86_tune_string = NULL;
6197 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6198 if (enum_opts_set.x_ix86_fpmath)
6199 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6200 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6201 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6203 if (TARGET_80387_P (opts->x_target_flags))
6204 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6207 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6208 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6211 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6212 ix86_option_override_internal (false, opts, opts_set);
6214 /* Add any builtin functions with the new isa if any. */
6215 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6217 /* Save the current options unless we are validating options for
6219 t = build_target_option_node (opts);
6221 opts->x_ix86_arch_string = orig_arch_string;
6222 opts->x_ix86_tune_string = orig_tune_string;
6223 opts_set->x_ix86_fpmath = orig_fpmath_set;
6225 /* Free up memory allocated to hold the strings */
6226 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6227 free (option_strings[i]);
6233 /* Hook to validate attribute((target("string"))). */
6236 ix86_valid_target_attribute_p (tree fndecl,
6237 tree ARG_UNUSED (name),
6239 int ARG_UNUSED (flags))
6241 struct gcc_options func_options;
6242 tree new_target, new_optimize;
6245 /* attribute((target("default"))) does nothing, beyond
6246 affecting multi-versioning. */
6247 if (TREE_VALUE (args)
6248 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6249 && TREE_CHAIN (args) == NULL_TREE
6250 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6253 tree old_optimize = build_optimization_node (&global_options);
6255 /* Get the optimization options of the current function. */
6256 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6259 func_optimize = old_optimize;
6261 /* Init func_options. */
6262 memset (&func_options, 0, sizeof (func_options));
6263 init_options_struct (&func_options, NULL);
6264 lang_hooks.init_options_struct (&func_options);
6266 cl_optimization_restore (&func_options,
6267 TREE_OPTIMIZATION (func_optimize));
6269 /* Initialize func_options to the default before its target options can
6271 cl_target_option_restore (&func_options,
6272 TREE_TARGET_OPTION (target_option_default_node));
6274 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6275 &global_options_set);
6277 new_optimize = build_optimization_node (&func_options);
6279 if (new_target == error_mark_node)
6282 else if (fndecl && new_target)
6284 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6286 if (old_optimize != new_optimize)
6287 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6290 finalize_options_struct (&func_options);
6296 /* Hook to determine if one function can safely inline another. */
6299 ix86_can_inline_p (tree caller, tree callee)
6302 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6303 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6305 /* If callee has no option attributes, then it is ok to inline. */
6309 /* If caller has no option attributes, but callee does then it is not ok to
6311 else if (!caller_tree)
6316 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6317 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6319 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6320 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6322 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6323 != callee_opts->x_ix86_isa_flags)
6326 /* See if we have the same non-isa options. */
6327 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6330 /* See if arch, tune, etc. are the same. */
6331 else if (caller_opts->arch != callee_opts->arch)
6334 else if (caller_opts->tune != callee_opts->tune)
6337 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6340 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6351 /* Remember the last target of ix86_set_current_function. */
6352 static GTY(()) tree ix86_previous_fndecl;
6354 /* Set targets globals to the default (or current #pragma GCC target
6355 if active). Invalidate ix86_previous_fndecl cache. */
6358 ix86_reset_previous_fndecl (void)
6360 tree new_tree = target_option_current_node;
6361 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6362 if (TREE_TARGET_GLOBALS (new_tree))
6363 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6364 else if (new_tree == target_option_default_node)
6365 restore_target_globals (&default_target_globals);
6367 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6368 ix86_previous_fndecl = NULL_TREE;
6371 /* Establish appropriate back-end context for processing the function
6372 FNDECL. The argument might be NULL to indicate processing at top
6373 level, outside of any function scope. */
6375 ix86_set_current_function (tree fndecl)
6377 /* Only change the context if the function changes. This hook is called
6378 several times in the course of compiling a function, and we don't want to
6379 slow things down too much or call target_reinit when it isn't safe. */
6380 if (fndecl == ix86_previous_fndecl)
6384 if (ix86_previous_fndecl == NULL_TREE)
6385 old_tree = target_option_current_node;
6386 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6387 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6389 old_tree = target_option_default_node;
6391 if (fndecl == NULL_TREE)
6393 if (old_tree != target_option_current_node)
6394 ix86_reset_previous_fndecl ();
6398 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6399 if (new_tree == NULL_TREE)
6400 new_tree = target_option_default_node;
6402 if (old_tree != new_tree)
6404 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6405 if (TREE_TARGET_GLOBALS (new_tree))
6406 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6407 else if (new_tree == target_option_default_node)
6408 restore_target_globals (&default_target_globals);
6410 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6412 ix86_previous_fndecl = fndecl;
6414 /* 64-bit MS and SYSV ABI have different set of call used registers.
6415 Avoid expensive re-initialization of init_regs each time we switch
6416 function context. */
6418 && (call_used_regs[SI_REG]
6419 == (cfun->machine->call_abi == MS_ABI)))
6424 /* Return true if this goes in large data/bss. */
6427 ix86_in_large_data_p (tree exp)
6429 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6432 /* Functions are never large data. */
6433 if (TREE_CODE (exp) == FUNCTION_DECL)
6436 /* Automatic variables are never large data. */
6437 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6440 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6442 const char *section = DECL_SECTION_NAME (exp);
6443 if (strcmp (section, ".ldata") == 0
6444 || strcmp (section, ".lbss") == 0)
6450 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6452 /* If this is an incomplete type with size 0, then we can't put it
6453 in data because it might be too big when completed. Also,
6454 int_size_in_bytes returns -1 if size can vary or is larger than
6455 an integer in which case also it is safer to assume that it goes in
6457 if (size <= 0 || size > ix86_section_threshold)
6464 /* Switch to the appropriate section for output of DECL.
6465 DECL is either a `VAR_DECL' node or a constant of some sort.
6466 RELOC indicates whether forming the initial value of DECL requires
6467 link-time relocations. */
6469 ATTRIBUTE_UNUSED static section *
6470 x86_64_elf_select_section (tree decl, int reloc,
6471 unsigned HOST_WIDE_INT align)
6473 if (ix86_in_large_data_p (decl))
6475 const char *sname = NULL;
6476 unsigned int flags = SECTION_WRITE;
6477 switch (categorize_decl_for_section (decl, reloc))
6482 case SECCAT_DATA_REL:
6483 sname = ".ldata.rel";
6485 case SECCAT_DATA_REL_LOCAL:
6486 sname = ".ldata.rel.local";
6488 case SECCAT_DATA_REL_RO:
6489 sname = ".ldata.rel.ro";
6491 case SECCAT_DATA_REL_RO_LOCAL:
6492 sname = ".ldata.rel.ro.local";
6496 flags |= SECTION_BSS;
6499 case SECCAT_RODATA_MERGE_STR:
6500 case SECCAT_RODATA_MERGE_STR_INIT:
6501 case SECCAT_RODATA_MERGE_CONST:
6505 case SECCAT_SRODATA:
6512 /* We don't split these for medium model. Place them into
6513 default sections and hope for best. */
6518 /* We might get called with string constants, but get_named_section
6519 doesn't like them as they are not DECLs. Also, we need to set
6520 flags in that case. */
6522 return get_section (sname, flags, NULL);
6523 return get_named_section (decl, sname, reloc);
6526 return default_elf_select_section (decl, reloc, align);
6529 /* Select a set of attributes for section NAME based on the properties
6530 of DECL and whether or not RELOC indicates that DECL's initializer
6531 might contain runtime relocations. */
6533 static unsigned int ATTRIBUTE_UNUSED
6534 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6536 unsigned int flags = default_section_type_flags (decl, name, reloc);
6538 if (decl == NULL_TREE
6539 && (strcmp (name, ".ldata.rel.ro") == 0
6540 || strcmp (name, ".ldata.rel.ro.local") == 0))
6541 flags |= SECTION_RELRO;
6543 if (strcmp (name, ".lbss") == 0
6544 || strncmp (name, ".lbss.", 5) == 0
6545 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6546 flags |= SECTION_BSS;
6551 /* Build up a unique section name, expressed as a
6552 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6553 RELOC indicates whether the initial value of EXP requires
6554 link-time relocations. */
6556 static void ATTRIBUTE_UNUSED
6557 x86_64_elf_unique_section (tree decl, int reloc)
6559 if (ix86_in_large_data_p (decl))
6561 const char *prefix = NULL;
6562 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6563 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6565 switch (categorize_decl_for_section (decl, reloc))
6568 case SECCAT_DATA_REL:
6569 case SECCAT_DATA_REL_LOCAL:
6570 case SECCAT_DATA_REL_RO:
6571 case SECCAT_DATA_REL_RO_LOCAL:
6572 prefix = one_only ? ".ld" : ".ldata";
6575 prefix = one_only ? ".lb" : ".lbss";
6578 case SECCAT_RODATA_MERGE_STR:
6579 case SECCAT_RODATA_MERGE_STR_INIT:
6580 case SECCAT_RODATA_MERGE_CONST:
6581 prefix = one_only ? ".lr" : ".lrodata";
6583 case SECCAT_SRODATA:
6590 /* We don't split these for medium model. Place them into
6591 default sections and hope for best. */
6596 const char *name, *linkonce;
6599 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6600 name = targetm.strip_name_encoding (name);
6602 /* If we're using one_only, then there needs to be a .gnu.linkonce
6603 prefix to the section name. */
6604 linkonce = one_only ? ".gnu.linkonce" : "";
6606 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6608 set_decl_section_name (decl, string);
6612 default_unique_section (decl, reloc);
6615 #ifdef COMMON_ASM_OP
6616 /* This says how to output assembler code to declare an
6617 uninitialized external linkage data object.
6619 For medium model x86-64 we need to use .largecomm opcode for
6622 x86_elf_aligned_common (FILE *file,
6623 const char *name, unsigned HOST_WIDE_INT size,
6626 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6627 && size > (unsigned int)ix86_section_threshold)
6628 fputs ("\t.largecomm\t", file);
6630 fputs (COMMON_ASM_OP, file);
6631 assemble_name (file, name);
6632 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6633 size, align / BITS_PER_UNIT);
6637 /* Utility function for targets to use in implementing
6638 ASM_OUTPUT_ALIGNED_BSS. */
6641 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6642 unsigned HOST_WIDE_INT size, int align)
6644 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6645 && size > (unsigned int)ix86_section_threshold)
6646 switch_to_section (get_named_section (decl, ".lbss", 0));
6648 switch_to_section (bss_section);
6649 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6650 #ifdef ASM_DECLARE_OBJECT_NAME
6651 last_assemble_variable_decl = decl;
6652 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6654 /* Standard thing is just output label for the object. */
6655 ASM_OUTPUT_LABEL (file, name);
6656 #endif /* ASM_DECLARE_OBJECT_NAME */
6657 ASM_OUTPUT_SKIP (file, size ? size : 1);
6660 /* Decide whether we must probe the stack before any space allocation
6661 on this target. It's essentially TARGET_STACK_PROBE except when
6662 -fstack-check causes the stack to be already probed differently. */
6665 ix86_target_stack_probe (void)
6667 /* Do not probe the stack twice if static stack checking is enabled. */
6668 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6671 return TARGET_STACK_PROBE;
6674 /* Decide whether we can make a sibling call to a function. DECL is the
6675 declaration of the function being targeted by the call and EXP is the
6676 CALL_EXPR representing the call. */
6679 ix86_function_ok_for_sibcall (tree decl, tree exp)
6681 tree type, decl_or_type;
6683 bool bind_global = decl && !targetm.binds_local_p (decl);
6685 /* If we are generating position-independent code, we cannot sibcall
6686 optimize direct calls to global functions, as the PLT requires
6687 %ebx be live. (Darwin does not have a PLT.) */
6695 /* If we need to align the outgoing stack, then sibcalling would
6696 unalign the stack, which may break the called function. */
6697 if (ix86_minimum_incoming_stack_boundary (true)
6698 < PREFERRED_STACK_BOUNDARY)
6703 decl_or_type = decl;
6704 type = TREE_TYPE (decl);
6708 /* We're looking at the CALL_EXPR, we need the type of the function. */
6709 type = CALL_EXPR_FN (exp); /* pointer expression */
6710 type = TREE_TYPE (type); /* pointer type */
6711 type = TREE_TYPE (type); /* function type */
6712 decl_or_type = type;
6715 /* Check that the return value locations are the same. Like
6716 if we are returning floats on the 80387 register stack, we cannot
6717 make a sibcall from a function that doesn't return a float to a
6718 function that does or, conversely, from a function that does return
6719 a float to a function that doesn't; the necessary stack adjustment
6720 would not be executed. This is also the place we notice
6721 differences in the return value ABI. Note that it is ok for one
6722 of the functions to have void return type as long as the return
6723 value of the other is passed in a register. */
6724 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6725 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6727 if (STACK_REG_P (a) || STACK_REG_P (b))
6729 if (!rtx_equal_p (a, b))
6732 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6734 else if (!rtx_equal_p (a, b))
6739 /* The SYSV ABI has more call-clobbered registers;
6740 disallow sibcalls from MS to SYSV. */
6741 if (cfun->machine->call_abi == MS_ABI
6742 && ix86_function_type_abi (type) == SYSV_ABI)
6747 /* If this call is indirect, we'll need to be able to use a
6748 call-clobbered register for the address of the target function.
6749 Make sure that all such registers are not used for passing
6750 parameters. Note that DLLIMPORT functions and call to global
6751 function via GOT slot are indirect. */
6753 || (bind_global && flag_pic && !flag_plt)
6754 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6756 /* Check if regparm >= 3 since arg_reg_available is set to
6757 false if regparm == 0. If regparm is 1 or 2, there is
6758 always a call-clobbered register available.
6760 ??? The symbol indirect call doesn't need a call-clobbered
6761 register. But we don't know if this is a symbol indirect
6762 call or not here. */
6763 if (ix86_function_regparm (type, NULL) >= 3
6764 && !cfun->machine->arg_reg_available)
6769 /* Otherwise okay. That also includes certain types of indirect calls. */
6773 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6774 and "sseregparm" calling convention attributes;
6775 arguments as in struct attribute_spec.handler. */
6778 ix86_handle_cconv_attribute (tree *node, tree name,
6783 if (TREE_CODE (*node) != FUNCTION_TYPE
6784 && TREE_CODE (*node) != METHOD_TYPE
6785 && TREE_CODE (*node) != FIELD_DECL
6786 && TREE_CODE (*node) != TYPE_DECL)
6788 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6790 *no_add_attrs = true;
6794 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6795 if (is_attribute_p ("regparm", name))
6799 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6801 error ("fastcall and regparm attributes are not compatible");
6804 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6806 error ("regparam and thiscall attributes are not compatible");
6809 cst = TREE_VALUE (args);
6810 if (TREE_CODE (cst) != INTEGER_CST)
6812 warning (OPT_Wattributes,
6813 "%qE attribute requires an integer constant argument",
6815 *no_add_attrs = true;
6817 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6819 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6821 *no_add_attrs = true;
6829 /* Do not warn when emulating the MS ABI. */
6830 if ((TREE_CODE (*node) != FUNCTION_TYPE
6831 && TREE_CODE (*node) != METHOD_TYPE)
6832 || ix86_function_type_abi (*node) != MS_ABI)
6833 warning (OPT_Wattributes, "%qE attribute ignored",
6835 *no_add_attrs = true;
6839 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6840 if (is_attribute_p ("fastcall", name))
6842 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6844 error ("fastcall and cdecl attributes are not compatible");
6846 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6848 error ("fastcall and stdcall attributes are not compatible");
6850 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6852 error ("fastcall and regparm attributes are not compatible");
6854 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6856 error ("fastcall and thiscall attributes are not compatible");
6860 /* Can combine stdcall with fastcall (redundant), regparm and
6862 else if (is_attribute_p ("stdcall", name))
6864 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6866 error ("stdcall and cdecl attributes are not compatible");
6868 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6870 error ("stdcall and fastcall attributes are not compatible");
6872 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6874 error ("stdcall and thiscall attributes are not compatible");
6878 /* Can combine cdecl with regparm and sseregparm. */
6879 else if (is_attribute_p ("cdecl", name))
6881 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6883 error ("stdcall and cdecl attributes are not compatible");
6885 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6887 error ("fastcall and cdecl attributes are not compatible");
6889 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6891 error ("cdecl and thiscall attributes are not compatible");
6894 else if (is_attribute_p ("thiscall", name))
6896 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6897 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6899 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6901 error ("stdcall and thiscall attributes are not compatible");
6903 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6905 error ("fastcall and thiscall attributes are not compatible");
6907 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6909 error ("cdecl and thiscall attributes are not compatible");
6913 /* Can combine sseregparm with all attributes. */
6918 /* The transactional memory builtins are implicitly regparm or fastcall
6919 depending on the ABI. Override the generic do-nothing attribute that
6920 these builtins were declared with, and replace it with one of the two
6921 attributes that we expect elsewhere. */
6924 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6925 int flags, bool *no_add_attrs)
6929 /* In no case do we want to add the placeholder attribute. */
6930 *no_add_attrs = true;
6932 /* The 64-bit ABI is unchanged for transactional memory. */
6936 /* ??? Is there a better way to validate 32-bit windows? We have
6937 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6938 if (CHECK_STACK_LIMIT > 0)
6939 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6942 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6943 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6945 decl_attributes (node, alt, flags);
6950 /* This function determines from TYPE the calling-convention. */
6953 ix86_get_callcvt (const_tree type)
6955 unsigned int ret = 0;
6960 return IX86_CALLCVT_CDECL;
6962 attrs = TYPE_ATTRIBUTES (type);
6963 if (attrs != NULL_TREE)
6965 if (lookup_attribute ("cdecl", attrs))
6966 ret |= IX86_CALLCVT_CDECL;
6967 else if (lookup_attribute ("stdcall", attrs))
6968 ret |= IX86_CALLCVT_STDCALL;
6969 else if (lookup_attribute ("fastcall", attrs))
6970 ret |= IX86_CALLCVT_FASTCALL;
6971 else if (lookup_attribute ("thiscall", attrs))
6972 ret |= IX86_CALLCVT_THISCALL;
6974 /* Regparam isn't allowed for thiscall and fastcall. */
6975 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6977 if (lookup_attribute ("regparm", attrs))
6978 ret |= IX86_CALLCVT_REGPARM;
6979 if (lookup_attribute ("sseregparm", attrs))
6980 ret |= IX86_CALLCVT_SSEREGPARM;
6983 if (IX86_BASE_CALLCVT(ret) != 0)
6987 is_stdarg = stdarg_p (type);
6988 if (TARGET_RTD && !is_stdarg)
6989 return IX86_CALLCVT_STDCALL | ret;
6993 || TREE_CODE (type) != METHOD_TYPE
6994 || ix86_function_type_abi (type) != MS_ABI)
6995 return IX86_CALLCVT_CDECL | ret;
6997 return IX86_CALLCVT_THISCALL;
7000 /* Return 0 if the attributes for two types are incompatible, 1 if they
7001 are compatible, and 2 if they are nearly compatible (which causes a
7002 warning to be generated). */
7005 ix86_comp_type_attributes (const_tree type1, const_tree type2)
7007 unsigned int ccvt1, ccvt2;
7009 if (TREE_CODE (type1) != FUNCTION_TYPE
7010 && TREE_CODE (type1) != METHOD_TYPE)
7013 ccvt1 = ix86_get_callcvt (type1);
7014 ccvt2 = ix86_get_callcvt (type2);
7017 if (ix86_function_regparm (type1, NULL)
7018 != ix86_function_regparm (type2, NULL))
7024 /* Return the regparm value for a function with the indicated TYPE and DECL.
7025 DECL may be NULL when calling function indirectly
7026 or considering a libcall. */
7029 ix86_function_regparm (const_tree type, const_tree decl)
7036 return (ix86_function_type_abi (type) == SYSV_ABI
7037 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7038 ccvt = ix86_get_callcvt (type);
7039 regparm = ix86_regparm;
7041 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7043 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7046 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7050 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7052 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7055 /* Use register calling convention for local functions when possible. */
7057 && TREE_CODE (decl) == FUNCTION_DECL)
7059 cgraph_node *target = cgraph_node::get (decl);
7061 target = target->function_symbol ();
7063 /* Caller and callee must agree on the calling convention, so
7064 checking here just optimize means that with
7065 __attribute__((optimize (...))) caller could use regparm convention
7066 and callee not, or vice versa. Instead look at whether the callee
7067 is optimized or not. */
7068 if (target && opt_for_fn (target->decl, optimize)
7069 && !(profile_flag && !flag_fentry))
7071 cgraph_local_info *i = &target->local;
7072 if (i && i->local && i->can_change_signature)
7074 int local_regparm, globals = 0, regno;
7076 /* Make sure no regparm register is taken by a
7077 fixed register variable. */
7078 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7080 if (fixed_regs[local_regparm])
7083 /* We don't want to use regparm(3) for nested functions as
7084 these use a static chain pointer in the third argument. */
7085 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7088 /* Save a register for the split stack. */
7089 if (local_regparm == 3 && flag_split_stack)
7092 /* Each fixed register usage increases register pressure,
7093 so less registers should be used for argument passing.
7094 This functionality can be overriden by an explicit
7096 for (regno = AX_REG; regno <= DI_REG; regno++)
7097 if (fixed_regs[regno])
7101 = globals < local_regparm ? local_regparm - globals : 0;
7103 if (local_regparm > regparm)
7104 regparm = local_regparm;
7112 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7113 DFmode (2) arguments in SSE registers for a function with the
7114 indicated TYPE and DECL. DECL may be NULL when calling function
7115 indirectly or considering a libcall. Return -1 if any FP parameter
7116 should be rejected by error. This is used in siutation we imply SSE
7117 calling convetion but the function is called from another function with
7118 SSE disabled. Otherwise return 0. */
7121 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7123 gcc_assert (!TARGET_64BIT);
7125 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7126 by the sseregparm attribute. */
7127 if (TARGET_SSEREGPARM
7128 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7135 error ("calling %qD with attribute sseregparm without "
7136 "SSE/SSE2 enabled", decl);
7138 error ("calling %qT with attribute sseregparm without "
7139 "SSE/SSE2 enabled", type);
7150 cgraph_node *target = cgraph_node::get (decl);
7152 target = target->function_symbol ();
7154 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7155 (and DFmode for SSE2) arguments in SSE registers. */
7157 /* TARGET_SSE_MATH */
7158 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7159 && opt_for_fn (target->decl, optimize)
7160 && !(profile_flag && !flag_fentry))
7162 cgraph_local_info *i = &target->local;
7163 if (i && i->local && i->can_change_signature)
7165 /* Refuse to produce wrong code when local function with SSE enabled
7166 is called from SSE disabled function.
7167 FIXME: We need a way to detect these cases cross-ltrans partition
7168 and avoid using SSE calling conventions on local functions called
7169 from function with SSE disabled. For now at least delay the
7170 warning until we know we are going to produce wrong code.
7172 if (!TARGET_SSE && warn)
7174 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7175 ->x_ix86_isa_flags) ? 2 : 1;
7182 /* Return true if EAX is live at the start of the function. Used by
7183 ix86_expand_prologue to determine if we need special help before
7184 calling allocate_stack_worker. */
7187 ix86_eax_live_at_start_p (void)
7189 /* Cheat. Don't bother working forward from ix86_function_regparm
7190 to the function type to whether an actual argument is located in
7191 eax. Instead just look at cfg info, which is still close enough
7192 to correct at this point. This gives false positives for broken
7193 functions that might use uninitialized data that happens to be
7194 allocated in eax, but who cares? */
7195 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7199 ix86_keep_aggregate_return_pointer (tree fntype)
7205 attr = lookup_attribute ("callee_pop_aggregate_return",
7206 TYPE_ATTRIBUTES (fntype));
7208 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7210 /* For 32-bit MS-ABI the default is to keep aggregate
7212 if (ix86_function_type_abi (fntype) == MS_ABI)
7215 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7218 /* Value is the number of bytes of arguments automatically
7219 popped when returning from a subroutine call.
7220 FUNDECL is the declaration node of the function (as a tree),
7221 FUNTYPE is the data type of the function (as a tree),
7222 or for a library call it is an identifier node for the subroutine name.
7223 SIZE is the number of bytes of arguments passed on the stack.
7225 On the 80386, the RTD insn may be used to pop them if the number
7226 of args is fixed, but if the number is variable then the caller
7227 must pop them all. RTD can't be used for library calls now
7228 because the library is compiled with the Unix compiler.
7229 Use of RTD is a selectable option, since it is incompatible with
7230 standard Unix calling sequences. If the option is not selected,
7231 the caller must always pop the args.
7233 The attribute stdcall is equivalent to RTD on a per module basis. */
7236 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7240 /* None of the 64-bit ABIs pop arguments. */
7244 ccvt = ix86_get_callcvt (funtype);
7246 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7247 | IX86_CALLCVT_THISCALL)) != 0
7248 && ! stdarg_p (funtype))
7251 /* Lose any fake structure return argument if it is passed on the stack. */
7252 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7253 && !ix86_keep_aggregate_return_pointer (funtype))
7255 int nregs = ix86_function_regparm (funtype, fundecl);
7257 return GET_MODE_SIZE (Pmode);
7263 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7266 ix86_legitimate_combined_insn (rtx_insn *insn)
7268 /* Check operand constraints in case hard registers were propagated
7269 into insn pattern. This check prevents combine pass from
7270 generating insn patterns with invalid hard register operands.
7271 These invalid insns can eventually confuse reload to error out
7272 with a spill failure. See also PRs 46829 and 46843. */
7273 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7277 extract_insn (insn);
7278 preprocess_constraints (insn);
7280 int n_operands = recog_data.n_operands;
7281 int n_alternatives = recog_data.n_alternatives;
7282 for (i = 0; i < n_operands; i++)
7284 rtx op = recog_data.operand[i];
7285 machine_mode mode = GET_MODE (op);
7286 const operand_alternative *op_alt;
7291 /* For pre-AVX disallow unaligned loads/stores where the
7292 instructions don't support it. */
7294 && VECTOR_MODE_P (mode)
7295 && misaligned_operand (op, mode))
7297 unsigned int min_align = get_attr_ssememalign (insn);
7299 || MEM_ALIGN (op) < min_align)
7303 /* A unary operator may be accepted by the predicate, but it
7304 is irrelevant for matching constraints. */
7310 if (REG_P (SUBREG_REG (op))
7311 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7312 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7313 GET_MODE (SUBREG_REG (op)),
7316 op = SUBREG_REG (op);
7319 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7322 op_alt = recog_op_alt;
7324 /* Operand has no constraints, anything is OK. */
7325 win = !n_alternatives;
7327 alternative_mask preferred = get_preferred_alternatives (insn);
7328 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7330 if (!TEST_BIT (preferred, j))
7332 if (op_alt[i].anything_ok
7333 || (op_alt[i].matches != -1
7335 (recog_data.operand[i],
7336 recog_data.operand[op_alt[i].matches]))
7337 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7352 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7354 static unsigned HOST_WIDE_INT
7355 ix86_asan_shadow_offset (void)
7357 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7358 : HOST_WIDE_INT_C (0x7fff8000))
7359 : (HOST_WIDE_INT_1 << 29);
7362 /* Argument support functions. */
7364 /* Return true when register may be used to pass function parameters. */
7366 ix86_function_arg_regno_p (int regno)
7369 enum calling_abi call_abi;
7370 const int *parm_regs;
7372 if (TARGET_MPX && BND_REGNO_P (regno))
7378 return (regno < REGPARM_MAX
7379 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7381 return (regno < REGPARM_MAX
7382 || (TARGET_MMX && MMX_REGNO_P (regno)
7383 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7384 || (TARGET_SSE && SSE_REGNO_P (regno)
7385 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7388 if (TARGET_SSE && SSE_REGNO_P (regno)
7389 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7392 /* TODO: The function should depend on current function ABI but
7393 builtins.c would need updating then. Therefore we use the
7395 call_abi = ix86_cfun_abi ();
7397 /* RAX is used as hidden argument to va_arg functions. */
7398 if (call_abi == SYSV_ABI && regno == AX_REG)
7401 if (call_abi == MS_ABI)
7402 parm_regs = x86_64_ms_abi_int_parameter_registers;
7404 parm_regs = x86_64_int_parameter_registers;
7406 for (i = 0; i < (call_abi == MS_ABI
7407 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7408 if (regno == parm_regs[i])
7413 /* Return if we do not know how to pass TYPE solely in registers. */
7416 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7418 if (must_pass_in_stack_var_size_or_pad (mode, type))
7421 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7422 The layout_type routine is crafty and tries to trick us into passing
7423 currently unsupported vector types on the stack by using TImode. */
7424 return (!TARGET_64BIT && mode == TImode
7425 && type && TREE_CODE (type) != VECTOR_TYPE);
7428 /* It returns the size, in bytes, of the area reserved for arguments passed
7429 in registers for the function represented by fndecl dependent to the used
7432 ix86_reg_parm_stack_space (const_tree fndecl)
7434 enum calling_abi call_abi = SYSV_ABI;
7435 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7436 call_abi = ix86_function_abi (fndecl);
7438 call_abi = ix86_function_type_abi (fndecl);
7439 if (TARGET_64BIT && call_abi == MS_ABI)
7444 /* We add this as a workaround in order to use libc_has_function
7447 ix86_libc_has_function (enum function_class fn_class)
7449 return targetm.libc_has_function (fn_class);
7452 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7453 specifying the call abi used. */
7455 ix86_function_type_abi (const_tree fntype)
7457 enum calling_abi abi = ix86_abi;
7459 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7463 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7466 error ("X32 does not support ms_abi attribute");
7470 else if (abi == MS_ABI
7471 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7477 static enum calling_abi
7478 ix86_function_abi (const_tree fndecl)
7480 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7483 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7484 specifying the call abi used. */
7486 ix86_cfun_abi (void)
7488 return cfun ? cfun->machine->call_abi : ix86_abi;
7492 ix86_function_ms_hook_prologue (const_tree fn)
7494 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7496 if (decl_function_context (fn) != NULL_TREE)
7497 error_at (DECL_SOURCE_LOCATION (fn),
7498 "ms_hook_prologue is not compatible with nested function");
7505 /* Write the extra assembler code needed to declare a function properly. */
7508 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7511 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7515 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7516 unsigned int filler_cc = 0xcccccccc;
7518 for (i = 0; i < filler_count; i += 4)
7519 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7522 #ifdef SUBTARGET_ASM_UNWIND_INIT
7523 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7526 ASM_OUTPUT_LABEL (asm_out_file, fname);
7528 /* Output magic byte marker, if hot-patch attribute is set. */
7533 /* leaq [%rsp + 0], %rsp */
7534 asm_fprintf (asm_out_file, ASM_BYTE
7535 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7539 /* movl.s %edi, %edi
7541 movl.s %esp, %ebp */
7542 asm_fprintf (asm_out_file, ASM_BYTE
7543 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7549 extern void init_regs (void);
7551 /* Implementation of call abi switching target hook. Specific to FNDECL
7552 the specific call register sets are set. See also
7553 ix86_conditional_register_usage for more details. */
7555 ix86_call_abi_override (const_tree fndecl)
7557 cfun->machine->call_abi = ix86_function_abi (fndecl);
7560 /* Return 1 if pseudo register should be created and used to hold
7561 GOT address for PIC code. */
7563 ix86_use_pseudo_pic_reg (void)
7566 && (ix86_cmodel == CM_SMALL_PIC
7573 /* Initialize large model PIC register. */
7576 ix86_init_large_pic_reg (unsigned int tmp_regno)
7578 rtx_code_label *label;
7581 gcc_assert (Pmode == DImode);
7582 label = gen_label_rtx ();
7584 LABEL_PRESERVE_P (label) = 1;
7585 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7586 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7587 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7589 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7590 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7591 pic_offset_table_rtx, tmp_reg));
7594 /* Create and initialize PIC register if required. */
7596 ix86_init_pic_reg (void)
7601 if (!ix86_use_pseudo_pic_reg ())
7608 if (ix86_cmodel == CM_LARGE_PIC)
7609 ix86_init_large_pic_reg (R11_REG);
7611 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7615 /* If there is future mcount call in the function it is more profitable
7616 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7617 rtx reg = crtl->profile
7618 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7619 : pic_offset_table_rtx;
7620 rtx_insn *insn = emit_insn (gen_set_got (reg));
7621 RTX_FRAME_RELATED_P (insn) = 1;
7623 emit_move_insn (pic_offset_table_rtx, reg);
7624 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7630 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7631 insert_insn_on_edge (seq, entry_edge);
7632 commit_one_edge_insertion (entry_edge);
7635 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7636 for a call to a function whose data type is FNTYPE.
7637 For a library call, FNTYPE is 0. */
7640 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7641 tree fntype, /* tree ptr for function decl */
7642 rtx libname, /* SYMBOL_REF of library name or 0 */
7646 struct cgraph_local_info *i = NULL;
7647 struct cgraph_node *target = NULL;
7649 memset (cum, 0, sizeof (*cum));
7653 target = cgraph_node::get (fndecl);
7656 target = target->function_symbol ();
7657 i = cgraph_node::local_info (target->decl);
7658 cum->call_abi = ix86_function_abi (target->decl);
7661 cum->call_abi = ix86_function_abi (fndecl);
7664 cum->call_abi = ix86_function_type_abi (fntype);
7666 cum->caller = caller;
7668 /* Set up the number of registers to use for passing arguments. */
7669 cum->nregs = ix86_regparm;
7672 cum->nregs = (cum->call_abi == SYSV_ABI
7673 ? X86_64_REGPARM_MAX
7674 : X86_64_MS_REGPARM_MAX);
7678 cum->sse_nregs = SSE_REGPARM_MAX;
7681 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7682 ? X86_64_SSE_REGPARM_MAX
7683 : X86_64_MS_SSE_REGPARM_MAX);
7687 cum->mmx_nregs = MMX_REGPARM_MAX;
7688 cum->warn_avx512f = true;
7689 cum->warn_avx = true;
7690 cum->warn_sse = true;
7691 cum->warn_mmx = true;
7693 /* Because type might mismatch in between caller and callee, we need to
7694 use actual type of function for local calls.
7695 FIXME: cgraph_analyze can be told to actually record if function uses
7696 va_start so for local functions maybe_vaarg can be made aggressive
7698 FIXME: once typesytem is fixed, we won't need this code anymore. */
7699 if (i && i->local && i->can_change_signature)
7700 fntype = TREE_TYPE (target->decl);
7701 cum->stdarg = stdarg_p (fntype);
7702 cum->maybe_vaarg = (fntype
7703 ? (!prototype_p (fntype) || stdarg_p (fntype))
7706 cum->bnd_regno = FIRST_BND_REG;
7707 cum->bnds_in_bt = 0;
7708 cum->force_bnd_pass = 0;
7713 /* If there are variable arguments, then we won't pass anything
7714 in registers in 32-bit mode. */
7715 if (stdarg_p (fntype))
7718 /* Since in 32-bit, variable arguments are always passed on
7719 stack, there is scratch register available for indirect
7721 cfun->machine->arg_reg_available = true;
7724 cum->warn_avx512f = false;
7725 cum->warn_avx = false;
7726 cum->warn_sse = false;
7727 cum->warn_mmx = false;
7731 /* Use ecx and edx registers if function has fastcall attribute,
7732 else look for regparm information. */
7735 unsigned int ccvt = ix86_get_callcvt (fntype);
7736 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7739 cum->fastcall = 1; /* Same first register as in fastcall. */
7741 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7747 cum->nregs = ix86_function_regparm (fntype, fndecl);
7750 /* Set up the number of SSE registers used for passing SFmode
7751 and DFmode arguments. Warn for mismatching ABI. */
7752 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7755 cfun->machine->arg_reg_available = (cum->nregs > 0);
7758 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7759 But in the case of vector types, it is some vector mode.
7761 When we have only some of our vector isa extensions enabled, then there
7762 are some modes for which vector_mode_supported_p is false. For these
7763 modes, the generic vector support in gcc will choose some non-vector mode
7764 in order to implement the type. By computing the natural mode, we'll
7765 select the proper ABI location for the operand and not depend on whatever
7766 the middle-end decides to do with these vector types.
7768 The midde-end can't deal with the vector types > 16 bytes. In this
7769 case, we return the original mode and warn ABI change if CUM isn't
7772 If INT_RETURN is true, warn ABI change if the vector mode isn't
7773 available for function return value. */
7776 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7779 machine_mode mode = TYPE_MODE (type);
7781 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7783 HOST_WIDE_INT size = int_size_in_bytes (type);
7784 if ((size == 8 || size == 16 || size == 32 || size == 64)
7785 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7786 && TYPE_VECTOR_SUBPARTS (type) > 1)
7788 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7790 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7791 mode = MIN_MODE_VECTOR_FLOAT;
7793 mode = MIN_MODE_VECTOR_INT;
7795 /* Get the mode which has this inner mode and number of units. */
7796 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7797 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7798 && GET_MODE_INNER (mode) == innermode)
7800 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7802 static bool warnedavx512f;
7803 static bool warnedavx512f_ret;
7805 if (cum && cum->warn_avx512f && !warnedavx512f)
7807 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7808 "without AVX512F enabled changes the ABI"))
7809 warnedavx512f = true;
7811 else if (in_return && !warnedavx512f_ret)
7813 if (warning (OPT_Wpsabi, "AVX512F vector return "
7814 "without AVX512F enabled changes the ABI"))
7815 warnedavx512f_ret = true;
7818 return TYPE_MODE (type);
7820 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7822 static bool warnedavx;
7823 static bool warnedavx_ret;
7825 if (cum && cum->warn_avx && !warnedavx)
7827 if (warning (OPT_Wpsabi, "AVX vector argument "
7828 "without AVX enabled changes the ABI"))
7831 else if (in_return && !warnedavx_ret)
7833 if (warning (OPT_Wpsabi, "AVX vector return "
7834 "without AVX enabled changes the ABI"))
7835 warnedavx_ret = true;
7838 return TYPE_MODE (type);
7840 else if (((size == 8 && TARGET_64BIT) || size == 16)
7844 static bool warnedsse;
7845 static bool warnedsse_ret;
7847 if (cum && cum->warn_sse && !warnedsse)
7849 if (warning (OPT_Wpsabi, "SSE vector argument "
7850 "without SSE enabled changes the ABI"))
7853 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7855 if (warning (OPT_Wpsabi, "SSE vector return "
7856 "without SSE enabled changes the ABI"))
7857 warnedsse_ret = true;
7860 else if ((size == 8 && !TARGET_64BIT)
7864 static bool warnedmmx;
7865 static bool warnedmmx_ret;
7867 if (cum && cum->warn_mmx && !warnedmmx)
7869 if (warning (OPT_Wpsabi, "MMX vector argument "
7870 "without MMX enabled changes the ABI"))
7873 else if (in_return && !warnedmmx_ret)
7875 if (warning (OPT_Wpsabi, "MMX vector return "
7876 "without MMX enabled changes the ABI"))
7877 warnedmmx_ret = true;
7890 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7891 this may not agree with the mode that the type system has chosen for the
7892 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7893 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7896 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7901 if (orig_mode != BLKmode)
7902 tmp = gen_rtx_REG (orig_mode, regno);
7905 tmp = gen_rtx_REG (mode, regno);
7906 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7907 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7913 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7914 of this code is to classify each 8bytes of incoming argument by the register
7915 class and assign registers accordingly. */
7917 /* Return the union class of CLASS1 and CLASS2.
7918 See the x86-64 PS ABI for details. */
7920 static enum x86_64_reg_class
7921 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7923 /* Rule #1: If both classes are equal, this is the resulting class. */
7924 if (class1 == class2)
7927 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7929 if (class1 == X86_64_NO_CLASS)
7931 if (class2 == X86_64_NO_CLASS)
7934 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7935 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7936 return X86_64_MEMORY_CLASS;
7938 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7939 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7940 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7941 return X86_64_INTEGERSI_CLASS;
7942 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7943 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7944 return X86_64_INTEGER_CLASS;
7946 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7948 if (class1 == X86_64_X87_CLASS
7949 || class1 == X86_64_X87UP_CLASS
7950 || class1 == X86_64_COMPLEX_X87_CLASS
7951 || class2 == X86_64_X87_CLASS
7952 || class2 == X86_64_X87UP_CLASS
7953 || class2 == X86_64_COMPLEX_X87_CLASS)
7954 return X86_64_MEMORY_CLASS;
7956 /* Rule #6: Otherwise class SSE is used. */
7957 return X86_64_SSE_CLASS;
7960 /* Classify the argument of type TYPE and mode MODE.
7961 CLASSES will be filled by the register class used to pass each word
7962 of the operand. The number of words is returned. In case the parameter
7963 should be passed in memory, 0 is returned. As a special case for zero
7964 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7966 BIT_OFFSET is used internally for handling records and specifies offset
7967 of the offset in bits modulo 512 to avoid overflow cases.
7969 See the x86-64 PS ABI for details.
7973 classify_argument (machine_mode mode, const_tree type,
7974 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7976 HOST_WIDE_INT bytes =
7977 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7978 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7980 /* Variable sized entities are always passed/returned in memory. */
7984 if (mode != VOIDmode
7985 && targetm.calls.must_pass_in_stack (mode, type))
7988 if (type && AGGREGATE_TYPE_P (type))
7992 enum x86_64_reg_class subclasses[MAX_CLASSES];
7994 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7998 for (i = 0; i < words; i++)
7999 classes[i] = X86_64_NO_CLASS;
8001 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
8002 signalize memory class, so handle it as special case. */
8005 classes[0] = X86_64_NO_CLASS;
8009 /* Classify each field of record and merge classes. */
8010 switch (TREE_CODE (type))
8013 /* And now merge the fields of structure. */
8014 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8016 if (TREE_CODE (field) == FIELD_DECL)
8020 if (TREE_TYPE (field) == error_mark_node)
8023 /* Bitfields are always classified as integer. Handle them
8024 early, since later code would consider them to be
8025 misaligned integers. */
8026 if (DECL_BIT_FIELD (field))
8028 for (i = (int_bit_position (field)
8029 + (bit_offset % 64)) / 8 / 8;
8030 i < ((int_bit_position (field) + (bit_offset % 64))
8031 + tree_to_shwi (DECL_SIZE (field))
8034 merge_classes (X86_64_INTEGER_CLASS,
8041 type = TREE_TYPE (field);
8043 /* Flexible array member is ignored. */
8044 if (TYPE_MODE (type) == BLKmode
8045 && TREE_CODE (type) == ARRAY_TYPE
8046 && TYPE_SIZE (type) == NULL_TREE
8047 && TYPE_DOMAIN (type) != NULL_TREE
8048 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8053 if (!warned && warn_psabi)
8056 inform (input_location,
8057 "the ABI of passing struct with"
8058 " a flexible array member has"
8059 " changed in GCC 4.4");
8063 num = classify_argument (TYPE_MODE (type), type,
8065 (int_bit_position (field)
8066 + bit_offset) % 512);
8069 pos = (int_bit_position (field)
8070 + (bit_offset % 64)) / 8 / 8;
8071 for (i = 0; i < num && (i + pos) < words; i++)
8073 merge_classes (subclasses[i], classes[i + pos]);
8080 /* Arrays are handled as small records. */
8083 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8084 TREE_TYPE (type), subclasses, bit_offset);
8088 /* The partial classes are now full classes. */
8089 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8090 subclasses[0] = X86_64_SSE_CLASS;
8091 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8092 && !((bit_offset % 64) == 0 && bytes == 4))
8093 subclasses[0] = X86_64_INTEGER_CLASS;
8095 for (i = 0; i < words; i++)
8096 classes[i] = subclasses[i % num];
8101 case QUAL_UNION_TYPE:
8102 /* Unions are similar to RECORD_TYPE but offset is always 0.
8104 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8106 if (TREE_CODE (field) == FIELD_DECL)
8110 if (TREE_TYPE (field) == error_mark_node)
8113 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8114 TREE_TYPE (field), subclasses,
8118 for (i = 0; i < num && i < words; i++)
8119 classes[i] = merge_classes (subclasses[i], classes[i]);
8130 /* When size > 16 bytes, if the first one isn't
8131 X86_64_SSE_CLASS or any other ones aren't
8132 X86_64_SSEUP_CLASS, everything should be passed in
8134 if (classes[0] != X86_64_SSE_CLASS)
8137 for (i = 1; i < words; i++)
8138 if (classes[i] != X86_64_SSEUP_CLASS)
8142 /* Final merger cleanup. */
8143 for (i = 0; i < words; i++)
8145 /* If one class is MEMORY, everything should be passed in
8147 if (classes[i] == X86_64_MEMORY_CLASS)
8150 /* The X86_64_SSEUP_CLASS should be always preceded by
8151 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8152 if (classes[i] == X86_64_SSEUP_CLASS
8153 && classes[i - 1] != X86_64_SSE_CLASS
8154 && classes[i - 1] != X86_64_SSEUP_CLASS)
8156 /* The first one should never be X86_64_SSEUP_CLASS. */
8157 gcc_assert (i != 0);
8158 classes[i] = X86_64_SSE_CLASS;
8161 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8162 everything should be passed in memory. */
8163 if (classes[i] == X86_64_X87UP_CLASS
8164 && (classes[i - 1] != X86_64_X87_CLASS))
8168 /* The first one should never be X86_64_X87UP_CLASS. */
8169 gcc_assert (i != 0);
8170 if (!warned && warn_psabi)
8173 inform (input_location,
8174 "the ABI of passing union with long double"
8175 " has changed in GCC 4.4");
8183 /* Compute alignment needed. We align all types to natural boundaries with
8184 exception of XFmode that is aligned to 64bits. */
8185 if (mode != VOIDmode && mode != BLKmode)
8187 int mode_alignment = GET_MODE_BITSIZE (mode);
8190 mode_alignment = 128;
8191 else if (mode == XCmode)
8192 mode_alignment = 256;
8193 if (COMPLEX_MODE_P (mode))
8194 mode_alignment /= 2;
8195 /* Misaligned fields are always returned in memory. */
8196 if (bit_offset % mode_alignment)
8200 /* for V1xx modes, just use the base mode */
8201 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8202 && GET_MODE_UNIT_SIZE (mode) == bytes)
8203 mode = GET_MODE_INNER (mode);
8205 /* Classification of atomic types. */
8210 classes[0] = X86_64_SSE_CLASS;
8213 classes[0] = X86_64_SSE_CLASS;
8214 classes[1] = X86_64_SSEUP_CLASS;
8224 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8226 /* Analyze last 128 bits only. */
8227 size = (size - 1) & 0x7f;
8231 classes[0] = X86_64_INTEGERSI_CLASS;
8236 classes[0] = X86_64_INTEGER_CLASS;
8239 else if (size < 64+32)
8241 classes[0] = X86_64_INTEGER_CLASS;
8242 classes[1] = X86_64_INTEGERSI_CLASS;
8245 else if (size < 64+64)
8247 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8255 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8259 /* OImode shouldn't be used directly. */
8264 if (!(bit_offset % 64))
8265 classes[0] = X86_64_SSESF_CLASS;
8267 classes[0] = X86_64_SSE_CLASS;
8270 classes[0] = X86_64_SSEDF_CLASS;
8273 classes[0] = X86_64_X87_CLASS;
8274 classes[1] = X86_64_X87UP_CLASS;
8277 classes[0] = X86_64_SSE_CLASS;
8278 classes[1] = X86_64_SSEUP_CLASS;
8281 classes[0] = X86_64_SSE_CLASS;
8282 if (!(bit_offset % 64))
8288 if (!warned && warn_psabi)
8291 inform (input_location,
8292 "the ABI of passing structure with complex float"
8293 " member has changed in GCC 4.4");
8295 classes[1] = X86_64_SSESF_CLASS;
8299 classes[0] = X86_64_SSEDF_CLASS;
8300 classes[1] = X86_64_SSEDF_CLASS;
8303 classes[0] = X86_64_COMPLEX_X87_CLASS;
8306 /* This modes is larger than 16 bytes. */
8314 classes[0] = X86_64_SSE_CLASS;
8315 classes[1] = X86_64_SSEUP_CLASS;
8316 classes[2] = X86_64_SSEUP_CLASS;
8317 classes[3] = X86_64_SSEUP_CLASS;
8325 classes[0] = X86_64_SSE_CLASS;
8326 classes[1] = X86_64_SSEUP_CLASS;
8327 classes[2] = X86_64_SSEUP_CLASS;
8328 classes[3] = X86_64_SSEUP_CLASS;
8329 classes[4] = X86_64_SSEUP_CLASS;
8330 classes[5] = X86_64_SSEUP_CLASS;
8331 classes[6] = X86_64_SSEUP_CLASS;
8332 classes[7] = X86_64_SSEUP_CLASS;
8340 classes[0] = X86_64_SSE_CLASS;
8341 classes[1] = X86_64_SSEUP_CLASS;
8349 classes[0] = X86_64_SSE_CLASS;
8355 gcc_assert (VECTOR_MODE_P (mode));
8360 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8362 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8363 classes[0] = X86_64_INTEGERSI_CLASS;
8365 classes[0] = X86_64_INTEGER_CLASS;
8366 classes[1] = X86_64_INTEGER_CLASS;
8367 return 1 + (bytes > 8);
8371 /* Examine the argument and return set number of register required in each
8372 class. Return true iff parameter should be passed in memory. */
8375 examine_argument (machine_mode mode, const_tree type, int in_return,
8376 int *int_nregs, int *sse_nregs)
8378 enum x86_64_reg_class regclass[MAX_CLASSES];
8379 int n = classify_argument (mode, type, regclass, 0);
8386 for (n--; n >= 0; n--)
8387 switch (regclass[n])
8389 case X86_64_INTEGER_CLASS:
8390 case X86_64_INTEGERSI_CLASS:
8393 case X86_64_SSE_CLASS:
8394 case X86_64_SSESF_CLASS:
8395 case X86_64_SSEDF_CLASS:
8398 case X86_64_NO_CLASS:
8399 case X86_64_SSEUP_CLASS:
8401 case X86_64_X87_CLASS:
8402 case X86_64_X87UP_CLASS:
8403 case X86_64_COMPLEX_X87_CLASS:
8407 case X86_64_MEMORY_CLASS:
8414 /* Construct container for the argument used by GCC interface. See
8415 FUNCTION_ARG for the detailed description. */
8418 construct_container (machine_mode mode, machine_mode orig_mode,
8419 const_tree type, int in_return, int nintregs, int nsseregs,
8420 const int *intreg, int sse_regno)
8422 /* The following variables hold the static issued_error state. */
8423 static bool issued_sse_arg_error;
8424 static bool issued_sse_ret_error;
8425 static bool issued_x87_ret_error;
8427 machine_mode tmpmode;
8429 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8430 enum x86_64_reg_class regclass[MAX_CLASSES];
8434 int needed_sseregs, needed_intregs;
8435 rtx exp[MAX_CLASSES];
8438 n = classify_argument (mode, type, regclass, 0);
8441 if (examine_argument (mode, type, in_return, &needed_intregs,
8444 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8447 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8448 some less clueful developer tries to use floating-point anyway. */
8449 if (needed_sseregs && !TARGET_SSE)
8453 if (!issued_sse_ret_error)
8455 error ("SSE register return with SSE disabled");
8456 issued_sse_ret_error = true;
8459 else if (!issued_sse_arg_error)
8461 error ("SSE register argument with SSE disabled");
8462 issued_sse_arg_error = true;
8467 /* Likewise, error if the ABI requires us to return values in the
8468 x87 registers and the user specified -mno-80387. */
8469 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8470 for (i = 0; i < n; i++)
8471 if (regclass[i] == X86_64_X87_CLASS
8472 || regclass[i] == X86_64_X87UP_CLASS
8473 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8475 if (!issued_x87_ret_error)
8477 error ("x87 register return with x87 disabled");
8478 issued_x87_ret_error = true;
8483 /* First construct simple cases. Avoid SCmode, since we want to use
8484 single register to pass this type. */
8485 if (n == 1 && mode != SCmode)
8486 switch (regclass[0])
8488 case X86_64_INTEGER_CLASS:
8489 case X86_64_INTEGERSI_CLASS:
8490 return gen_rtx_REG (mode, intreg[0]);
8491 case X86_64_SSE_CLASS:
8492 case X86_64_SSESF_CLASS:
8493 case X86_64_SSEDF_CLASS:
8494 if (mode != BLKmode)
8495 return gen_reg_or_parallel (mode, orig_mode,
8496 SSE_REGNO (sse_regno));
8498 case X86_64_X87_CLASS:
8499 case X86_64_COMPLEX_X87_CLASS:
8500 return gen_rtx_REG (mode, FIRST_STACK_REG);
8501 case X86_64_NO_CLASS:
8502 /* Zero sized array, struct or class. */
8508 && regclass[0] == X86_64_SSE_CLASS
8509 && regclass[1] == X86_64_SSEUP_CLASS
8511 return gen_reg_or_parallel (mode, orig_mode,
8512 SSE_REGNO (sse_regno));
8514 && regclass[0] == X86_64_SSE_CLASS
8515 && regclass[1] == X86_64_SSEUP_CLASS
8516 && regclass[2] == X86_64_SSEUP_CLASS
8517 && regclass[3] == X86_64_SSEUP_CLASS
8519 return gen_reg_or_parallel (mode, orig_mode,
8520 SSE_REGNO (sse_regno));
8522 && regclass[0] == X86_64_SSE_CLASS
8523 && regclass[1] == X86_64_SSEUP_CLASS
8524 && regclass[2] == X86_64_SSEUP_CLASS
8525 && regclass[3] == X86_64_SSEUP_CLASS
8526 && regclass[4] == X86_64_SSEUP_CLASS
8527 && regclass[5] == X86_64_SSEUP_CLASS
8528 && regclass[6] == X86_64_SSEUP_CLASS
8529 && regclass[7] == X86_64_SSEUP_CLASS
8531 return gen_reg_or_parallel (mode, orig_mode,
8532 SSE_REGNO (sse_regno));
8534 && regclass[0] == X86_64_X87_CLASS
8535 && regclass[1] == X86_64_X87UP_CLASS)
8536 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8539 && regclass[0] == X86_64_INTEGER_CLASS
8540 && regclass[1] == X86_64_INTEGER_CLASS
8541 && (mode == CDImode || mode == TImode)
8542 && intreg[0] + 1 == intreg[1])
8543 return gen_rtx_REG (mode, intreg[0]);
8545 /* Otherwise figure out the entries of the PARALLEL. */
8546 for (i = 0; i < n; i++)
8550 switch (regclass[i])
8552 case X86_64_NO_CLASS:
8554 case X86_64_INTEGER_CLASS:
8555 case X86_64_INTEGERSI_CLASS:
8556 /* Merge TImodes on aligned occasions here too. */
8557 if (i * 8 + 8 > bytes)
8559 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8560 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8564 /* We've requested 24 bytes we
8565 don't have mode for. Use DImode. */
8566 if (tmpmode == BLKmode)
8569 = gen_rtx_EXPR_LIST (VOIDmode,
8570 gen_rtx_REG (tmpmode, *intreg),
8574 case X86_64_SSESF_CLASS:
8576 = gen_rtx_EXPR_LIST (VOIDmode,
8577 gen_rtx_REG (SFmode,
8578 SSE_REGNO (sse_regno)),
8582 case X86_64_SSEDF_CLASS:
8584 = gen_rtx_EXPR_LIST (VOIDmode,
8585 gen_rtx_REG (DFmode,
8586 SSE_REGNO (sse_regno)),
8590 case X86_64_SSE_CLASS:
8598 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8608 && regclass[1] == X86_64_SSEUP_CLASS
8609 && regclass[2] == X86_64_SSEUP_CLASS
8610 && regclass[3] == X86_64_SSEUP_CLASS);
8616 && regclass[1] == X86_64_SSEUP_CLASS
8617 && regclass[2] == X86_64_SSEUP_CLASS
8618 && regclass[3] == X86_64_SSEUP_CLASS
8619 && regclass[4] == X86_64_SSEUP_CLASS
8620 && regclass[5] == X86_64_SSEUP_CLASS
8621 && regclass[6] == X86_64_SSEUP_CLASS
8622 && regclass[7] == X86_64_SSEUP_CLASS);
8630 = gen_rtx_EXPR_LIST (VOIDmode,
8631 gen_rtx_REG (tmpmode,
8632 SSE_REGNO (sse_regno)),
8641 /* Empty aligned struct, union or class. */
8645 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8646 for (i = 0; i < nexps; i++)
8647 XVECEXP (ret, 0, i) = exp [i];
8651 /* Update the data in CUM to advance over an argument of mode MODE
8652 and data type TYPE. (TYPE is null for libcalls where that information
8653 may not be available.)
8655 Return a number of integer regsiters advanced over. */
8658 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8659 const_tree type, HOST_WIDE_INT bytes,
8660 HOST_WIDE_INT words)
8663 bool error_p = NULL;
8667 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8668 bytes in registers. */
8669 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8689 cum->words += words;
8690 cum->nregs -= words;
8691 cum->regno += words;
8692 if (cum->nregs >= 0)
8694 if (cum->nregs <= 0)
8697 cfun->machine->arg_reg_available = false;
8703 /* OImode shouldn't be used directly. */
8707 if (cum->float_in_sse == -1)
8709 if (cum->float_in_sse < 2)
8712 if (cum->float_in_sse == -1)
8714 if (cum->float_in_sse < 1)
8737 if (!type || !AGGREGATE_TYPE_P (type))
8739 cum->sse_words += words;
8740 cum->sse_nregs -= 1;
8741 cum->sse_regno += 1;
8742 if (cum->sse_nregs <= 0)
8756 if (!type || !AGGREGATE_TYPE_P (type))
8758 cum->mmx_words += words;
8759 cum->mmx_nregs -= 1;
8760 cum->mmx_regno += 1;
8761 if (cum->mmx_nregs <= 0)
8771 cum->float_in_sse = 0;
8772 error ("calling %qD with SSE calling convention without "
8773 "SSE/SSE2 enabled", cum->decl);
8774 sorry ("this is a GCC bug that can be worked around by adding "
8775 "attribute used to function called");
8782 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8783 const_tree type, HOST_WIDE_INT words, bool named)
8785 int int_nregs, sse_nregs;
8787 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8788 if (!named && (VALID_AVX512F_REG_MODE (mode)
8789 || VALID_AVX256_REG_MODE (mode)))
8792 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8793 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8795 cum->nregs -= int_nregs;
8796 cum->sse_nregs -= sse_nregs;
8797 cum->regno += int_nregs;
8798 cum->sse_regno += sse_nregs;
8803 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8804 cum->words = ROUND_UP (cum->words, align);
8805 cum->words += words;
8811 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8812 HOST_WIDE_INT words)
8814 /* Otherwise, this should be passed indirect. */
8815 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8817 cum->words += words;
8827 /* Update the data in CUM to advance over an argument of mode MODE and
8828 data type TYPE. (TYPE is null for libcalls where that information
8829 may not be available.) */
8832 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8833 const_tree type, bool named)
8835 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8836 HOST_WIDE_INT bytes, words;
8839 if (mode == BLKmode)
8840 bytes = int_size_in_bytes (type);
8842 bytes = GET_MODE_SIZE (mode);
8843 words = CEIL (bytes, UNITS_PER_WORD);
8846 mode = type_natural_mode (type, NULL, false);
8848 if ((type && POINTER_BOUNDS_TYPE_P (type))
8849 || POINTER_BOUNDS_MODE_P (mode))
8851 /* If we pass bounds in BT then just update remained bounds count. */
8852 if (cum->bnds_in_bt)
8858 /* Update remained number of bounds to force. */
8859 if (cum->force_bnd_pass)
8860 cum->force_bnd_pass--;
8867 /* The first arg not going to Bounds Tables resets this counter. */
8868 cum->bnds_in_bt = 0;
8869 /* For unnamed args we always pass bounds to avoid bounds mess when
8870 passed and received types do not match. If bounds do not follow
8871 unnamed arg, still pretend required number of bounds were passed. */
8872 if (cum->force_bnd_pass)
8874 cum->bnd_regno += cum->force_bnd_pass;
8875 cum->force_bnd_pass = 0;
8880 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8882 if (call_abi == MS_ABI)
8883 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8885 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8888 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8890 /* For stdarg we expect bounds to be passed for each value passed
8893 cum->force_bnd_pass = nregs;
8894 /* For pointers passed in memory we expect bounds passed in Bounds
8897 cum->bnds_in_bt = chkp_type_bounds_count (type);
8900 /* Define where to put the arguments to a function.
8901 Value is zero to push the argument on the stack,
8902 or a hard register in which to store the argument.
8904 MODE is the argument's machine mode.
8905 TYPE is the data type of the argument (as a tree).
8906 This is null for libcalls where that information may
8908 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8909 the preceding args and about the function being called.
8910 NAMED is nonzero if this argument is a named parameter
8911 (otherwise it is an extra parameter matching an ellipsis). */
8914 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8915 machine_mode orig_mode, const_tree type,
8916 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8918 bool error_p = false;
8919 /* Avoid the AL settings for the Unix64 ABI. */
8920 if (mode == VOIDmode)
8925 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8926 bytes in registers. */
8927 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8946 if (words <= cum->nregs)
8948 int regno = cum->regno;
8950 /* Fastcall allocates the first two DWORD (SImode) or
8951 smaller arguments to ECX and EDX if it isn't an
8957 || (type && AGGREGATE_TYPE_P (type)))
8960 /* ECX not EAX is the first allocated register. */
8961 if (regno == AX_REG)
8964 return gen_rtx_REG (mode, regno);
8969 if (cum->float_in_sse == -1)
8971 if (cum->float_in_sse < 2)
8974 if (cum->float_in_sse == -1)
8976 if (cum->float_in_sse < 1)
8980 /* In 32bit, we pass TImode in xmm registers. */
8987 if (!type || !AGGREGATE_TYPE_P (type))
8990 return gen_reg_or_parallel (mode, orig_mode,
8991 cum->sse_regno + FIRST_SSE_REG);
8997 /* OImode and XImode shouldn't be used directly. */
9012 if (!type || !AGGREGATE_TYPE_P (type))
9015 return gen_reg_or_parallel (mode, orig_mode,
9016 cum->sse_regno + FIRST_SSE_REG);
9026 if (!type || !AGGREGATE_TYPE_P (type))
9029 return gen_reg_or_parallel (mode, orig_mode,
9030 cum->mmx_regno + FIRST_MMX_REG);
9036 cum->float_in_sse = 0;
9037 error ("calling %qD with SSE calling convention without "
9038 "SSE/SSE2 enabled", cum->decl);
9039 sorry ("this is a GCC bug that can be worked around by adding "
9040 "attribute used to function called");
9047 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9048 machine_mode orig_mode, const_tree type, bool named)
9050 /* Handle a hidden AL argument containing number of registers
9051 for varargs x86-64 functions. */
9052 if (mode == VOIDmode)
9053 return GEN_INT (cum->maybe_vaarg
9054 ? (cum->sse_nregs < 0
9055 ? X86_64_SSE_REGPARM_MAX
9076 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9082 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9084 &x86_64_int_parameter_registers [cum->regno],
9089 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9090 machine_mode orig_mode, bool named,
9091 HOST_WIDE_INT bytes)
9095 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9096 We use value of -2 to specify that current function call is MSABI. */
9097 if (mode == VOIDmode)
9098 return GEN_INT (-2);
9100 /* If we've run out of registers, it goes on the stack. */
9101 if (cum->nregs == 0)
9104 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9106 /* Only floating point modes are passed in anything but integer regs. */
9107 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9110 regno = cum->regno + FIRST_SSE_REG;
9115 /* Unnamed floating parameters are passed in both the
9116 SSE and integer registers. */
9117 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9118 t2 = gen_rtx_REG (mode, regno);
9119 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9120 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9121 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9124 /* Handle aggregated types passed in register. */
9125 if (orig_mode == BLKmode)
9127 if (bytes > 0 && bytes <= 8)
9128 mode = (bytes > 4 ? DImode : SImode);
9129 if (mode == BLKmode)
9133 return gen_reg_or_parallel (mode, orig_mode, regno);
9136 /* Return where to put the arguments to a function.
9137 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9139 MODE is the argument's machine mode. TYPE is the data type of the
9140 argument. It is null for libcalls where that information may not be
9141 available. CUM gives information about the preceding args and about
9142 the function being called. NAMED is nonzero if this argument is a
9143 named parameter (otherwise it is an extra parameter matching an
9147 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9148 const_tree type, bool named)
9150 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9151 machine_mode mode = omode;
9152 HOST_WIDE_INT bytes, words;
9155 /* All pointer bounds arguments are handled separately here. */
9156 if ((type && POINTER_BOUNDS_TYPE_P (type))
9157 || POINTER_BOUNDS_MODE_P (mode))
9159 /* Return NULL if bounds are forced to go in Bounds Table. */
9160 if (cum->bnds_in_bt)
9162 /* Return the next available bound reg if any. */
9163 else if (cum->bnd_regno <= LAST_BND_REG)
9164 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9165 /* Return the next special slot number otherwise. */
9167 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9172 if (mode == BLKmode)
9173 bytes = int_size_in_bytes (type);
9175 bytes = GET_MODE_SIZE (mode);
9176 words = CEIL (bytes, UNITS_PER_WORD);
9178 /* To simplify the code below, represent vector types with a vector mode
9179 even if MMX/SSE are not active. */
9180 if (type && TREE_CODE (type) == VECTOR_TYPE)
9181 mode = type_natural_mode (type, cum, false);
9185 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9187 if (call_abi == MS_ABI)
9188 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9190 arg = function_arg_64 (cum, mode, omode, type, named);
9193 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9198 /* A C expression that indicates when an argument must be passed by
9199 reference. If nonzero for an argument, a copy of that argument is
9200 made in memory and a pointer to the argument is passed instead of
9201 the argument itself. The pointer is passed in whatever way is
9202 appropriate for passing a pointer to that type. */
9205 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9206 const_tree type, bool)
9208 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9210 /* Bounds are never passed by reference. */
9211 if ((type && POINTER_BOUNDS_TYPE_P (type))
9212 || POINTER_BOUNDS_MODE_P (mode))
9217 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9219 /* See Windows x64 Software Convention. */
9220 if (call_abi == MS_ABI)
9222 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9226 /* Arrays are passed by reference. */
9227 if (TREE_CODE (type) == ARRAY_TYPE)
9230 if (RECORD_OR_UNION_TYPE_P (type))
9232 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9233 are passed by reference. */
9234 msize = int_size_in_bytes (type);
9238 /* __m128 is passed by reference. */
9239 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9241 else if (type && int_size_in_bytes (type) == -1)
9248 /* Return true when TYPE should be 128bit aligned for 32bit argument
9249 passing ABI. XXX: This function is obsolete and is only used for
9250 checking psABI compatibility with previous versions of GCC. */
9253 ix86_compat_aligned_value_p (const_tree type)
9255 machine_mode mode = TYPE_MODE (type);
9256 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9260 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9262 if (TYPE_ALIGN (type) < 128)
9265 if (AGGREGATE_TYPE_P (type))
9267 /* Walk the aggregates recursively. */
9268 switch (TREE_CODE (type))
9272 case QUAL_UNION_TYPE:
9276 /* Walk all the structure fields. */
9277 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9279 if (TREE_CODE (field) == FIELD_DECL
9280 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9287 /* Just for use if some languages passes arrays by value. */
9288 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9299 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9300 XXX: This function is obsolete and is only used for checking psABI
9301 compatibility with previous versions of GCC. */
9304 ix86_compat_function_arg_boundary (machine_mode mode,
9305 const_tree type, unsigned int align)
9307 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9308 natural boundaries. */
9309 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9311 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9312 make an exception for SSE modes since these require 128bit
9315 The handling here differs from field_alignment. ICC aligns MMX
9316 arguments to 4 byte boundaries, while structure fields are aligned
9317 to 8 byte boundaries. */
9320 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9321 align = PARM_BOUNDARY;
9325 if (!ix86_compat_aligned_value_p (type))
9326 align = PARM_BOUNDARY;
9329 if (align > BIGGEST_ALIGNMENT)
9330 align = BIGGEST_ALIGNMENT;
9334 /* Return true when TYPE should be 128bit aligned for 32bit argument
9338 ix86_contains_aligned_value_p (const_tree type)
9340 machine_mode mode = TYPE_MODE (type);
9342 if (mode == XFmode || mode == XCmode)
9345 if (TYPE_ALIGN (type) < 128)
9348 if (AGGREGATE_TYPE_P (type))
9350 /* Walk the aggregates recursively. */
9351 switch (TREE_CODE (type))
9355 case QUAL_UNION_TYPE:
9359 /* Walk all the structure fields. */
9360 for (field = TYPE_FIELDS (type);
9362 field = DECL_CHAIN (field))
9364 if (TREE_CODE (field) == FIELD_DECL
9365 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9372 /* Just for use if some languages passes arrays by value. */
9373 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9382 return TYPE_ALIGN (type) >= 128;
9387 /* Gives the alignment boundary, in bits, of an argument with the
9388 specified mode and type. */
9391 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9396 /* Since the main variant type is used for call, we convert it to
9397 the main variant type. */
9398 type = TYPE_MAIN_VARIANT (type);
9399 align = TYPE_ALIGN (type);
9402 align = GET_MODE_ALIGNMENT (mode);
9403 if (align < PARM_BOUNDARY)
9404 align = PARM_BOUNDARY;
9408 unsigned int saved_align = align;
9412 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9415 if (mode == XFmode || mode == XCmode)
9416 align = PARM_BOUNDARY;
9418 else if (!ix86_contains_aligned_value_p (type))
9419 align = PARM_BOUNDARY;
9422 align = PARM_BOUNDARY;
9427 && align != ix86_compat_function_arg_boundary (mode, type,
9431 inform (input_location,
9432 "The ABI for passing parameters with %d-byte"
9433 " alignment has changed in GCC 4.6",
9434 align / BITS_PER_UNIT);
9441 /* Return true if N is a possible register number of function value. */
9444 ix86_function_value_regno_p (const unsigned int regno)
9451 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9454 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9458 return chkp_function_instrumented_p (current_function_decl);
9460 /* Complex values are returned in %st(0)/%st(1) pair. */
9463 /* TODO: The function should depend on current function ABI but
9464 builtins.c would need updating then. Therefore we use the
9466 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9468 return TARGET_FLOAT_RETURNS_IN_80387;
9470 /* Complex values are returned in %xmm0/%xmm1 pair. */
9476 if (TARGET_MACHO || TARGET_64BIT)
9484 /* Define how to find the value returned by a function.
9485 VALTYPE is the data type of the value (as a tree).
9486 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9487 otherwise, FUNC is 0. */
9490 function_value_32 (machine_mode orig_mode, machine_mode mode,
9491 const_tree fntype, const_tree fn)
9495 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9496 we normally prevent this case when mmx is not available. However
9497 some ABIs may require the result to be returned like DImode. */
9498 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9499 regno = FIRST_MMX_REG;
9501 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9502 we prevent this case when sse is not available. However some ABIs
9503 may require the result to be returned like integer TImode. */
9504 else if (mode == TImode
9505 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9506 regno = FIRST_SSE_REG;
9508 /* 32-byte vector modes in %ymm0. */
9509 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9510 regno = FIRST_SSE_REG;
9512 /* 64-byte vector modes in %zmm0. */
9513 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9514 regno = FIRST_SSE_REG;
9516 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9517 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9518 regno = FIRST_FLOAT_REG;
9520 /* Most things go in %eax. */
9523 /* Override FP return register with %xmm0 for local functions when
9524 SSE math is enabled or for functions with sseregparm attribute. */
9525 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9527 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9528 if (sse_level == -1)
9530 error ("calling %qD with SSE caling convention without "
9531 "SSE/SSE2 enabled", fn);
9532 sorry ("this is a GCC bug that can be worked around by adding "
9533 "attribute used to function called");
9535 else if ((sse_level >= 1 && mode == SFmode)
9536 || (sse_level == 2 && mode == DFmode))
9537 regno = FIRST_SSE_REG;
9540 /* OImode shouldn't be used directly. */
9541 gcc_assert (mode != OImode);
9543 return gen_rtx_REG (orig_mode, regno);
9547 function_value_64 (machine_mode orig_mode, machine_mode mode,
9552 /* Handle libcalls, which don't provide a type node. */
9553 if (valtype == NULL)
9567 regno = FIRST_SSE_REG;
9571 regno = FIRST_FLOAT_REG;
9579 return gen_rtx_REG (mode, regno);
9581 else if (POINTER_TYPE_P (valtype))
9583 /* Pointers are always returned in word_mode. */
9587 ret = construct_container (mode, orig_mode, valtype, 1,
9588 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9589 x86_64_int_return_registers, 0);
9591 /* For zero sized structures, construct_container returns NULL, but we
9592 need to keep rest of compiler happy by returning meaningful value. */
9594 ret = gen_rtx_REG (orig_mode, AX_REG);
9600 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9603 unsigned int regno = AX_REG;
9607 switch (GET_MODE_SIZE (mode))
9610 if (valtype != NULL_TREE
9611 && !VECTOR_INTEGER_TYPE_P (valtype)
9612 && !VECTOR_INTEGER_TYPE_P (valtype)
9613 && !INTEGRAL_TYPE_P (valtype)
9614 && !VECTOR_FLOAT_TYPE_P (valtype))
9616 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9617 && !COMPLEX_MODE_P (mode))
9618 regno = FIRST_SSE_REG;
9622 if (mode == SFmode || mode == DFmode)
9623 regno = FIRST_SSE_REG;
9629 return gen_rtx_REG (orig_mode, regno);
9633 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9634 machine_mode orig_mode, machine_mode mode)
9636 const_tree fn, fntype;
9639 if (fntype_or_decl && DECL_P (fntype_or_decl))
9640 fn = fntype_or_decl;
9641 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9643 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9644 || POINTER_BOUNDS_MODE_P (mode))
9645 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9646 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9647 return function_value_ms_64 (orig_mode, mode, valtype);
9648 else if (TARGET_64BIT)
9649 return function_value_64 (orig_mode, mode, valtype);
9651 return function_value_32 (orig_mode, mode, fntype, fn);
9655 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9657 machine_mode mode, orig_mode;
9659 orig_mode = TYPE_MODE (valtype);
9660 mode = type_natural_mode (valtype, NULL, true);
9661 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9664 /* Return an RTX representing a place where a function returns
9665 or recieves pointer bounds or NULL if no bounds are returned.
9667 VALTYPE is a data type of a value returned by the function.
9669 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9670 or FUNCTION_TYPE of the function.
9672 If OUTGOING is false, return a place in which the caller will
9673 see the return value. Otherwise, return a place where a
9674 function returns a value. */
9677 ix86_function_value_bounds (const_tree valtype,
9678 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9679 bool outgoing ATTRIBUTE_UNUSED)
9683 if (BOUNDED_TYPE_P (valtype))
9684 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9685 else if (chkp_type_has_pointer (valtype))
9690 unsigned i, bnd_no = 0;
9692 bitmap_obstack_initialize (NULL);
9693 slots = BITMAP_ALLOC (NULL);
9694 chkp_find_bound_slots (valtype, slots);
9696 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9698 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9699 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9700 gcc_assert (bnd_no < 2);
9701 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9704 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9706 BITMAP_FREE (slots);
9707 bitmap_obstack_release (NULL);
9715 /* Pointer function arguments and return values are promoted to
9719 ix86_promote_function_mode (const_tree type, machine_mode mode,
9720 int *punsignedp, const_tree fntype,
9723 if (type != NULL_TREE && POINTER_TYPE_P (type))
9725 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9728 return default_promote_function_mode (type, mode, punsignedp, fntype,
9732 /* Return true if a structure, union or array with MODE containing FIELD
9733 should be accessed using BLKmode. */
9736 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9738 /* Union with XFmode must be in BLKmode. */
9739 return (mode == XFmode
9740 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9741 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9745 ix86_libcall_value (machine_mode mode)
9747 return ix86_function_value_1 (NULL, NULL, mode, mode);
9750 /* Return true iff type is returned in memory. */
9753 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9755 #ifdef SUBTARGET_RETURN_IN_MEMORY
9756 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9758 const machine_mode mode = type_natural_mode (type, NULL, true);
9761 if (POINTER_BOUNDS_TYPE_P (type))
9766 if (ix86_function_type_abi (fntype) == MS_ABI)
9768 size = int_size_in_bytes (type);
9770 /* __m128 is returned in xmm0. */
9771 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9772 || INTEGRAL_TYPE_P (type)
9773 || VECTOR_FLOAT_TYPE_P (type))
9774 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9775 && !COMPLEX_MODE_P (mode)
9776 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9779 /* Otherwise, the size must be exactly in [1248]. */
9780 return size != 1 && size != 2 && size != 4 && size != 8;
9784 int needed_intregs, needed_sseregs;
9786 return examine_argument (mode, type, 1,
9787 &needed_intregs, &needed_sseregs);
9792 size = int_size_in_bytes (type);
9794 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9795 bytes in registers. */
9797 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9799 if (mode == BLKmode)
9802 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9805 if (VECTOR_MODE_P (mode) || mode == TImode)
9807 /* User-created vectors small enough to fit in EAX. */
9811 /* Unless ABI prescibes otherwise,
9812 MMX/3dNow values are returned in MM0 if available. */
9815 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9817 /* SSE values are returned in XMM0 if available. */
9821 /* AVX values are returned in YMM0 if available. */
9825 /* AVX512F values are returned in ZMM0 if available. */
9827 return !TARGET_AVX512F;
9836 /* OImode shouldn't be used directly. */
9837 gcc_assert (mode != OImode);
9845 /* Create the va_list data type. */
9848 ix86_build_builtin_va_list_64 (void)
9850 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9852 record = lang_hooks.types.make_type (RECORD_TYPE);
9853 type_decl = build_decl (BUILTINS_LOCATION,
9854 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9856 f_gpr = build_decl (BUILTINS_LOCATION,
9857 FIELD_DECL, get_identifier ("gp_offset"),
9858 unsigned_type_node);
9859 f_fpr = build_decl (BUILTINS_LOCATION,
9860 FIELD_DECL, get_identifier ("fp_offset"),
9861 unsigned_type_node);
9862 f_ovf = build_decl (BUILTINS_LOCATION,
9863 FIELD_DECL, get_identifier ("overflow_arg_area"),
9865 f_sav = build_decl (BUILTINS_LOCATION,
9866 FIELD_DECL, get_identifier ("reg_save_area"),
9869 va_list_gpr_counter_field = f_gpr;
9870 va_list_fpr_counter_field = f_fpr;
9872 DECL_FIELD_CONTEXT (f_gpr) = record;
9873 DECL_FIELD_CONTEXT (f_fpr) = record;
9874 DECL_FIELD_CONTEXT (f_ovf) = record;
9875 DECL_FIELD_CONTEXT (f_sav) = record;
9877 TYPE_STUB_DECL (record) = type_decl;
9878 TYPE_NAME (record) = type_decl;
9879 TYPE_FIELDS (record) = f_gpr;
9880 DECL_CHAIN (f_gpr) = f_fpr;
9881 DECL_CHAIN (f_fpr) = f_ovf;
9882 DECL_CHAIN (f_ovf) = f_sav;
9884 layout_type (record);
9886 /* The correct type is an array type of one element. */
9887 return build_array_type (record, build_index_type (size_zero_node));
9890 /* Setup the builtin va_list data type and for 64-bit the additional
9891 calling convention specific va_list data types. */
9894 ix86_build_builtin_va_list (void)
9898 /* Initialize ABI specific va_list builtin types. */
9899 tree sysv_va_list, ms_va_list;
9901 sysv_va_list = ix86_build_builtin_va_list_64 ();
9902 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9904 /* For MS_ABI we use plain pointer to argument area. */
9905 ms_va_list = build_pointer_type (char_type_node);
9906 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9908 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9912 /* For i386 we use plain pointer to argument area. */
9913 return build_pointer_type (char_type_node);
9917 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9920 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9926 /* GPR size of varargs save area. */
9927 if (cfun->va_list_gpr_size)
9928 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9930 ix86_varargs_gpr_size = 0;
9932 /* FPR size of varargs save area. We don't need it if we don't pass
9933 anything in SSE registers. */
9934 if (TARGET_SSE && cfun->va_list_fpr_size)
9935 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9937 ix86_varargs_fpr_size = 0;
9939 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9942 save_area = frame_pointer_rtx;
9943 set = get_varargs_alias_set ();
9945 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9946 if (max > X86_64_REGPARM_MAX)
9947 max = X86_64_REGPARM_MAX;
9949 for (i = cum->regno; i < max; i++)
9951 mem = gen_rtx_MEM (word_mode,
9952 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9953 MEM_NOTRAP_P (mem) = 1;
9954 set_mem_alias_set (mem, set);
9955 emit_move_insn (mem,
9956 gen_rtx_REG (word_mode,
9957 x86_64_int_parameter_registers[i]));
9960 if (ix86_varargs_fpr_size)
9963 rtx_code_label *label;
9966 /* Now emit code to save SSE registers. The AX parameter contains number
9967 of SSE parameter registers used to call this function, though all we
9968 actually check here is the zero/non-zero status. */
9970 label = gen_label_rtx ();
9971 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9972 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9975 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9976 we used movdqa (i.e. TImode) instead? Perhaps even better would
9977 be if we could determine the real mode of the data, via a hook
9978 into pass_stdarg. Ignore all that for now. */
9980 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9981 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9983 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9984 if (max > X86_64_SSE_REGPARM_MAX)
9985 max = X86_64_SSE_REGPARM_MAX;
9987 for (i = cum->sse_regno; i < max; ++i)
9989 mem = plus_constant (Pmode, save_area,
9990 i * 16 + ix86_varargs_gpr_size);
9991 mem = gen_rtx_MEM (smode, mem);
9992 MEM_NOTRAP_P (mem) = 1;
9993 set_mem_alias_set (mem, set);
9994 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9996 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
10004 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
10006 alias_set_type set = get_varargs_alias_set ();
10009 /* Reset to zero, as there might be a sysv vaarg used
10011 ix86_varargs_gpr_size = 0;
10012 ix86_varargs_fpr_size = 0;
10014 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
10018 mem = gen_rtx_MEM (Pmode,
10019 plus_constant (Pmode, virtual_incoming_args_rtx,
10020 i * UNITS_PER_WORD));
10021 MEM_NOTRAP_P (mem) = 1;
10022 set_mem_alias_set (mem, set);
10024 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10025 emit_move_insn (mem, reg);
10030 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10031 tree type, int *, int no_rtl)
10033 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10034 CUMULATIVE_ARGS next_cum;
10037 /* This argument doesn't appear to be used anymore. Which is good,
10038 because the old code here didn't suppress rtl generation. */
10039 gcc_assert (!no_rtl);
10044 fntype = TREE_TYPE (current_function_decl);
10046 /* For varargs, we do not want to skip the dummy va_dcl argument.
10047 For stdargs, we do want to skip the last named argument. */
10049 if (stdarg_p (fntype))
10050 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10053 if (cum->call_abi == MS_ABI)
10054 setup_incoming_varargs_ms_64 (&next_cum);
10056 setup_incoming_varargs_64 (&next_cum);
10060 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10061 enum machine_mode mode,
10063 int *pretend_size ATTRIBUTE_UNUSED,
10066 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10067 CUMULATIVE_ARGS next_cum;
10070 int bnd_reg, i, max;
10072 gcc_assert (!no_rtl);
10074 /* Do nothing if we use plain pointer to argument area. */
10075 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10078 fntype = TREE_TYPE (current_function_decl);
10080 /* For varargs, we do not want to skip the dummy va_dcl argument.
10081 For stdargs, we do want to skip the last named argument. */
10083 if (stdarg_p (fntype))
10084 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10086 save_area = frame_pointer_rtx;
10088 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10089 if (max > X86_64_REGPARM_MAX)
10090 max = X86_64_REGPARM_MAX;
10092 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10093 if (chkp_function_instrumented_p (current_function_decl))
10094 for (i = cum->regno; i < max; i++)
10096 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10097 rtx ptr = gen_rtx_REG (Pmode,
10098 x86_64_int_parameter_registers[i]);
10101 if (bnd_reg <= LAST_BND_REG)
10102 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10106 plus_constant (Pmode, arg_pointer_rtx,
10107 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10108 bounds = gen_reg_rtx (BNDmode);
10109 emit_insn (BNDmode == BND64mode
10110 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10111 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10114 emit_insn (BNDmode == BND64mode
10115 ? gen_bnd64_stx (addr, ptr, bounds)
10116 : gen_bnd32_stx (addr, ptr, bounds));
10123 /* Checks if TYPE is of kind va_list char *. */
10126 is_va_list_char_pointer (tree type)
10130 /* For 32-bit it is always true. */
10133 canonic = ix86_canonical_va_list_type (type);
10134 return (canonic == ms_va_list_type_node
10135 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10138 /* Implement va_start. */
10141 ix86_va_start (tree valist, rtx nextarg)
10143 HOST_WIDE_INT words, n_gpr, n_fpr;
10144 tree f_gpr, f_fpr, f_ovf, f_sav;
10145 tree gpr, fpr, ovf, sav, t;
10149 if (flag_split_stack
10150 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10152 unsigned int scratch_regno;
10154 /* When we are splitting the stack, we can't refer to the stack
10155 arguments using internal_arg_pointer, because they may be on
10156 the old stack. The split stack prologue will arrange to
10157 leave a pointer to the old stack arguments in a scratch
10158 register, which we here copy to a pseudo-register. The split
10159 stack prologue can't set the pseudo-register directly because
10160 it (the prologue) runs before any registers have been saved. */
10162 scratch_regno = split_stack_prologue_scratch_regno ();
10163 if (scratch_regno != INVALID_REGNUM)
10168 reg = gen_reg_rtx (Pmode);
10169 cfun->machine->split_stack_varargs_pointer = reg;
10172 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10173 seq = get_insns ();
10176 push_topmost_sequence ();
10177 emit_insn_after (seq, entry_of_function ());
10178 pop_topmost_sequence ();
10182 /* Only 64bit target needs something special. */
10183 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10185 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10186 std_expand_builtin_va_start (valist, nextarg);
10191 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10192 next = expand_binop (ptr_mode, add_optab,
10193 cfun->machine->split_stack_varargs_pointer,
10194 crtl->args.arg_offset_rtx,
10195 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10196 convert_move (va_r, next, 0);
10198 /* Store zero bounds for va_list. */
10199 if (chkp_function_instrumented_p (current_function_decl))
10200 chkp_expand_bounds_reset_for_mem (valist,
10201 make_tree (TREE_TYPE (valist),
10208 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10209 f_fpr = DECL_CHAIN (f_gpr);
10210 f_ovf = DECL_CHAIN (f_fpr);
10211 f_sav = DECL_CHAIN (f_ovf);
10213 valist = build_simple_mem_ref (valist);
10214 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10215 /* The following should be folded into the MEM_REF offset. */
10216 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10218 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10220 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10222 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10225 /* Count number of gp and fp argument registers used. */
10226 words = crtl->args.info.words;
10227 n_gpr = crtl->args.info.regno;
10228 n_fpr = crtl->args.info.sse_regno;
10230 if (cfun->va_list_gpr_size)
10232 type = TREE_TYPE (gpr);
10233 t = build2 (MODIFY_EXPR, type,
10234 gpr, build_int_cst (type, n_gpr * 8));
10235 TREE_SIDE_EFFECTS (t) = 1;
10236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10239 if (TARGET_SSE && cfun->va_list_fpr_size)
10241 type = TREE_TYPE (fpr);
10242 t = build2 (MODIFY_EXPR, type, fpr,
10243 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10244 TREE_SIDE_EFFECTS (t) = 1;
10245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10248 /* Find the overflow area. */
10249 type = TREE_TYPE (ovf);
10250 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10251 ovf_rtx = crtl->args.internal_arg_pointer;
10253 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10254 t = make_tree (type, ovf_rtx);
10256 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10258 /* Store zero bounds for overflow area pointer. */
10259 if (chkp_function_instrumented_p (current_function_decl))
10260 chkp_expand_bounds_reset_for_mem (ovf, t);
10262 t = build2 (MODIFY_EXPR, type, ovf, t);
10263 TREE_SIDE_EFFECTS (t) = 1;
10264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10266 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10268 /* Find the register save area.
10269 Prologue of the function save it right above stack frame. */
10270 type = TREE_TYPE (sav);
10271 t = make_tree (type, frame_pointer_rtx);
10272 if (!ix86_varargs_gpr_size)
10273 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10275 /* Store zero bounds for save area pointer. */
10276 if (chkp_function_instrumented_p (current_function_decl))
10277 chkp_expand_bounds_reset_for_mem (sav, t);
10279 t = build2 (MODIFY_EXPR, type, sav, t);
10280 TREE_SIDE_EFFECTS (t) = 1;
10281 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10285 /* Implement va_arg. */
10288 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10289 gimple_seq *post_p)
10291 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10292 tree f_gpr, f_fpr, f_ovf, f_sav;
10293 tree gpr, fpr, ovf, sav, t;
10295 tree lab_false, lab_over = NULL_TREE;
10298 int indirect_p = 0;
10300 machine_mode nat_mode;
10301 unsigned int arg_boundary;
10303 /* Only 64bit target needs something special. */
10304 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10305 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10307 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10308 f_fpr = DECL_CHAIN (f_gpr);
10309 f_ovf = DECL_CHAIN (f_fpr);
10310 f_sav = DECL_CHAIN (f_ovf);
10312 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10313 valist, f_gpr, NULL_TREE);
10315 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10316 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10317 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10319 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10321 type = build_pointer_type (type);
10322 size = int_size_in_bytes (type);
10323 rsize = CEIL (size, UNITS_PER_WORD);
10325 nat_mode = type_natural_mode (type, NULL, false);
10340 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10341 if (!TARGET_64BIT_MS_ABI)
10348 container = construct_container (nat_mode, TYPE_MODE (type),
10349 type, 0, X86_64_REGPARM_MAX,
10350 X86_64_SSE_REGPARM_MAX, intreg,
10355 /* Pull the value out of the saved registers. */
10357 addr = create_tmp_var (ptr_type_node, "addr");
10361 int needed_intregs, needed_sseregs;
10363 tree int_addr, sse_addr;
10365 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10366 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10368 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10370 need_temp = (!REG_P (container)
10371 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10372 || TYPE_ALIGN (type) > 128));
10374 /* In case we are passing structure, verify that it is consecutive block
10375 on the register save area. If not we need to do moves. */
10376 if (!need_temp && !REG_P (container))
10378 /* Verify that all registers are strictly consecutive */
10379 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10383 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10385 rtx slot = XVECEXP (container, 0, i);
10386 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10387 || INTVAL (XEXP (slot, 1)) != i * 16)
10395 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10397 rtx slot = XVECEXP (container, 0, i);
10398 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10399 || INTVAL (XEXP (slot, 1)) != i * 8)
10411 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10412 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10415 /* First ensure that we fit completely in registers. */
10416 if (needed_intregs)
10418 t = build_int_cst (TREE_TYPE (gpr),
10419 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10420 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10421 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10422 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10423 gimplify_and_add (t, pre_p);
10425 if (needed_sseregs)
10427 t = build_int_cst (TREE_TYPE (fpr),
10428 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10429 + X86_64_REGPARM_MAX * 8);
10430 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10431 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10432 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10433 gimplify_and_add (t, pre_p);
10436 /* Compute index to start of area used for integer regs. */
10437 if (needed_intregs)
10439 /* int_addr = gpr + sav; */
10440 t = fold_build_pointer_plus (sav, gpr);
10441 gimplify_assign (int_addr, t, pre_p);
10443 if (needed_sseregs)
10445 /* sse_addr = fpr + sav; */
10446 t = fold_build_pointer_plus (sav, fpr);
10447 gimplify_assign (sse_addr, t, pre_p);
10451 int i, prev_size = 0;
10452 tree temp = create_tmp_var (type, "va_arg_tmp");
10454 /* addr = &temp; */
10455 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10456 gimplify_assign (addr, t, pre_p);
10458 for (i = 0; i < XVECLEN (container, 0); i++)
10460 rtx slot = XVECEXP (container, 0, i);
10461 rtx reg = XEXP (slot, 0);
10462 machine_mode mode = GET_MODE (reg);
10466 tree src_addr, src;
10468 tree dest_addr, dest;
10469 int cur_size = GET_MODE_SIZE (mode);
10471 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10472 prev_size = INTVAL (XEXP (slot, 1));
10473 if (prev_size + cur_size > size)
10475 cur_size = size - prev_size;
10476 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10477 if (mode == BLKmode)
10480 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10481 if (mode == GET_MODE (reg))
10482 addr_type = build_pointer_type (piece_type);
10484 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10486 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10489 if (SSE_REGNO_P (REGNO (reg)))
10491 src_addr = sse_addr;
10492 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10496 src_addr = int_addr;
10497 src_offset = REGNO (reg) * 8;
10499 src_addr = fold_convert (addr_type, src_addr);
10500 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10502 dest_addr = fold_convert (daddr_type, addr);
10503 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10504 if (cur_size == GET_MODE_SIZE (mode))
10506 src = build_va_arg_indirect_ref (src_addr);
10507 dest = build_va_arg_indirect_ref (dest_addr);
10509 gimplify_assign (dest, src, pre_p);
10514 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10515 3, dest_addr, src_addr,
10516 size_int (cur_size));
10517 gimplify_and_add (copy, pre_p);
10519 prev_size += cur_size;
10523 if (needed_intregs)
10525 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10526 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10527 gimplify_assign (gpr, t, pre_p);
10530 if (needed_sseregs)
10532 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10533 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10534 gimplify_assign (unshare_expr (fpr), t, pre_p);
10537 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10539 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10542 /* ... otherwise out of the overflow area. */
10544 /* When we align parameter on stack for caller, if the parameter
10545 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10546 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10547 here with caller. */
10548 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10549 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10550 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10552 /* Care for on-stack alignment if needed. */
10553 if (arg_boundary <= 64 || size == 0)
10557 HOST_WIDE_INT align = arg_boundary / 8;
10558 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10559 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10560 build_int_cst (TREE_TYPE (t), -align));
10563 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10564 gimplify_assign (addr, t, pre_p);
10566 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10567 gimplify_assign (unshare_expr (ovf), t, pre_p);
10570 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10572 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10573 addr = fold_convert (ptrtype, addr);
10576 addr = build_va_arg_indirect_ref (addr);
10577 return build_va_arg_indirect_ref (addr);
10580 /* Return true if OPNUM's MEM should be matched
10581 in movabs* patterns. */
10584 ix86_check_movabs (rtx insn, int opnum)
10588 set = PATTERN (insn);
10589 if (GET_CODE (set) == PARALLEL)
10590 set = XVECEXP (set, 0, 0);
10591 gcc_assert (GET_CODE (set) == SET);
10592 mem = XEXP (set, opnum);
10593 while (SUBREG_P (mem))
10594 mem = SUBREG_REG (mem);
10595 gcc_assert (MEM_P (mem));
10596 return volatile_ok || !MEM_VOLATILE_P (mem);
10599 /* Return false if INSN contains a MEM with a non-default address space. */
10601 ix86_check_no_addr_space (rtx insn)
10603 subrtx_var_iterator::array_type array;
10604 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10607 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10613 /* Initialize the table of extra 80387 mathematical constants. */
10616 init_ext_80387_constants (void)
10618 static const char * cst[5] =
10620 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10621 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10622 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10623 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10624 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10628 for (i = 0; i < 5; i++)
10630 real_from_string (&ext_80387_constants_table[i], cst[i]);
10631 /* Ensure each constant is rounded to XFmode precision. */
10632 real_convert (&ext_80387_constants_table[i],
10633 XFmode, &ext_80387_constants_table[i]);
10636 ext_80387_constants_init = 1;
10639 /* Return non-zero if the constant is something that
10640 can be loaded with a special instruction. */
10643 standard_80387_constant_p (rtx x)
10645 machine_mode mode = GET_MODE (x);
10647 const REAL_VALUE_TYPE *r;
10649 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10652 if (x == CONST0_RTX (mode))
10654 if (x == CONST1_RTX (mode))
10657 r = CONST_DOUBLE_REAL_VALUE (x);
10659 /* For XFmode constants, try to find a special 80387 instruction when
10660 optimizing for size or on those CPUs that benefit from them. */
10662 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10666 if (! ext_80387_constants_init)
10667 init_ext_80387_constants ();
10669 for (i = 0; i < 5; i++)
10670 if (real_identical (r, &ext_80387_constants_table[i]))
10674 /* Load of the constant -0.0 or -1.0 will be split as
10675 fldz;fchs or fld1;fchs sequence. */
10676 if (real_isnegzero (r))
10678 if (real_identical (r, &dconstm1))
10684 /* Return the opcode of the special instruction to be used to load
10688 standard_80387_constant_opcode (rtx x)
10690 switch (standard_80387_constant_p (x))
10710 gcc_unreachable ();
10714 /* Return the CONST_DOUBLE representing the 80387 constant that is
10715 loaded by the specified special instruction. The argument IDX
10716 matches the return value from standard_80387_constant_p. */
10719 standard_80387_constant_rtx (int idx)
10723 if (! ext_80387_constants_init)
10724 init_ext_80387_constants ();
10737 gcc_unreachable ();
10740 return const_double_from_real_value (ext_80387_constants_table[i],
10744 /* Return 1 if X is all 0s and 2 if x is all 1s
10745 in supported SSE/AVX vector mode. */
10748 standard_sse_constant_p (rtx x)
10755 mode = GET_MODE (x);
10757 if (x == const0_rtx || x == CONST0_RTX (mode))
10759 if (vector_all_ones_operand (x, mode))
10778 if (TARGET_AVX512F)
10787 /* Return the opcode of the special instruction to be used to load
10791 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10793 switch (standard_sse_constant_p (x))
10796 switch (get_attr_mode (insn))
10799 return "vpxord\t%g0, %g0, %g0";
10801 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10802 : "vpxord\t%g0, %g0, %g0";
10804 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10805 : "vpxorq\t%g0, %g0, %g0";
10807 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10808 : "%vpxor\t%0, %d0";
10810 return "%vxorpd\t%0, %d0";
10812 return "%vxorps\t%0, %d0";
10815 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10816 : "vpxor\t%x0, %x0, %x0";
10818 return "vxorpd\t%x0, %x0, %x0";
10820 return "vxorps\t%x0, %x0, %x0";
10827 if (TARGET_AVX512VL
10828 || get_attr_mode (insn) == MODE_XI
10829 || get_attr_mode (insn) == MODE_V8DF
10830 || get_attr_mode (insn) == MODE_V16SF)
10831 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10833 return "vpcmpeqd\t%0, %0, %0";
10835 return "pcmpeqd\t%0, %0";
10840 gcc_unreachable ();
10843 /* Returns true if OP contains a symbol reference */
10846 symbolic_reference_mentioned_p (rtx op)
10851 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10854 fmt = GET_RTX_FORMAT (GET_CODE (op));
10855 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10861 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10862 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10866 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10873 /* Return true if it is appropriate to emit `ret' instructions in the
10874 body of a function. Do this only if the epilogue is simple, needing a
10875 couple of insns. Prior to reloading, we can't tell how many registers
10876 must be saved, so return false then. Return false if there is no frame
10877 marker to de-allocate. */
10880 ix86_can_use_return_insn_p (void)
10882 struct ix86_frame frame;
10884 if (! reload_completed || frame_pointer_needed)
10887 /* Don't allow more than 32k pop, since that's all we can do
10888 with one instruction. */
10889 if (crtl->args.pops_args && crtl->args.size >= 32768)
10892 ix86_compute_frame_layout (&frame);
10893 return (frame.stack_pointer_offset == UNITS_PER_WORD
10894 && (frame.nregs + frame.nsseregs) == 0);
10897 /* Value should be nonzero if functions must have frame pointers.
10898 Zero means the frame pointer need not be set up (and parms may
10899 be accessed via the stack pointer) in functions that seem suitable. */
10902 ix86_frame_pointer_required (void)
10904 /* If we accessed previous frames, then the generated code expects
10905 to be able to access the saved ebp value in our frame. */
10906 if (cfun->machine->accesses_prev_frame)
10909 /* Several x86 os'es need a frame pointer for other reasons,
10910 usually pertaining to setjmp. */
10911 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10914 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10915 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10918 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10919 allocation is 4GB. */
10920 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10923 /* SSE saves require frame-pointer when stack is misaligned. */
10924 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10927 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10928 turns off the frame pointer by default. Turn it back on now if
10929 we've not got a leaf function. */
10930 if (TARGET_OMIT_LEAF_FRAME_POINTER
10932 || ix86_current_function_calls_tls_descriptor))
10935 if (crtl->profile && !flag_fentry)
10941 /* Record that the current function accesses previous call frames. */
10944 ix86_setup_frame_addresses (void)
10946 cfun->machine->accesses_prev_frame = 1;
10949 #ifndef USE_HIDDEN_LINKONCE
10950 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10951 # define USE_HIDDEN_LINKONCE 1
10953 # define USE_HIDDEN_LINKONCE 0
10957 static int pic_labels_used;
10959 /* Fills in the label name that should be used for a pc thunk for
10960 the given register. */
10963 get_pc_thunk_name (char name[32], unsigned int regno)
10965 gcc_assert (!TARGET_64BIT);
10967 if (USE_HIDDEN_LINKONCE)
10968 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10970 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10974 /* This function generates code for -fpic that loads %ebx with
10975 the return address of the caller and then returns. */
10978 ix86_code_end (void)
10983 for (regno = AX_REG; regno <= SP_REG; regno++)
10988 if (!(pic_labels_used & (1 << regno)))
10991 get_pc_thunk_name (name, regno);
10993 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10994 get_identifier (name),
10995 build_function_type_list (void_type_node, NULL_TREE));
10996 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10997 NULL_TREE, void_type_node);
10998 TREE_PUBLIC (decl) = 1;
10999 TREE_STATIC (decl) = 1;
11000 DECL_IGNORED_P (decl) = 1;
11005 switch_to_section (darwin_sections[text_coal_section]);
11006 fputs ("\t.weak_definition\t", asm_out_file);
11007 assemble_name (asm_out_file, name);
11008 fputs ("\n\t.private_extern\t", asm_out_file);
11009 assemble_name (asm_out_file, name);
11010 putc ('\n', asm_out_file);
11011 ASM_OUTPUT_LABEL (asm_out_file, name);
11012 DECL_WEAK (decl) = 1;
11016 if (USE_HIDDEN_LINKONCE)
11018 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11020 targetm.asm_out.unique_section (decl, 0);
11021 switch_to_section (get_named_section (decl, NULL, 0));
11023 targetm.asm_out.globalize_label (asm_out_file, name);
11024 fputs ("\t.hidden\t", asm_out_file);
11025 assemble_name (asm_out_file, name);
11026 putc ('\n', asm_out_file);
11027 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11031 switch_to_section (text_section);
11032 ASM_OUTPUT_LABEL (asm_out_file, name);
11035 DECL_INITIAL (decl) = make_node (BLOCK);
11036 current_function_decl = decl;
11037 allocate_struct_function (decl, false);
11038 init_function_start (decl);
11039 first_function_block_is_cold = false;
11040 /* Make sure unwind info is emitted for the thunk if needed. */
11041 final_start_function (emit_barrier (), asm_out_file, 1);
11043 /* Pad stack IP move with 4 instructions (two NOPs count
11044 as one instruction). */
11045 if (TARGET_PAD_SHORT_FUNCTION)
11050 fputs ("\tnop\n", asm_out_file);
11053 xops[0] = gen_rtx_REG (Pmode, regno);
11054 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11055 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11056 output_asm_insn ("%!ret", NULL);
11057 final_end_function ();
11058 init_insn_lengths ();
11059 free_after_compilation (cfun);
11061 current_function_decl = NULL;
11064 if (flag_split_stack)
11065 file_end_indicate_split_stack ();
11068 /* Emit code for the SET_GOT patterns. */
11071 output_set_got (rtx dest, rtx label)
11077 if (TARGET_VXWORKS_RTP && flag_pic)
11079 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11080 xops[2] = gen_rtx_MEM (Pmode,
11081 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11082 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11084 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11085 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11086 an unadorned address. */
11087 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11088 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11089 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11093 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11098 /* We don't need a pic base, we're not producing pic. */
11099 gcc_unreachable ();
11101 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11102 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11103 targetm.asm_out.internal_label (asm_out_file, "L",
11104 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11109 get_pc_thunk_name (name, REGNO (dest));
11110 pic_labels_used |= 1 << REGNO (dest);
11112 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11113 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11114 output_asm_insn ("%!call\t%X2", xops);
11117 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11118 This is what will be referenced by the Mach-O PIC subsystem. */
11119 if (machopic_should_output_picbase_label () || !label)
11120 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11122 /* When we are restoring the pic base at the site of a nonlocal label,
11123 and we decided to emit the pic base above, we will still output a
11124 local label used for calculating the correction offset (even though
11125 the offset will be 0 in that case). */
11127 targetm.asm_out.internal_label (asm_out_file, "L",
11128 CODE_LABEL_NUMBER (label));
11133 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11138 /* Generate an "push" pattern for input ARG. */
11143 struct machine_function *m = cfun->machine;
11145 if (m->fs.cfa_reg == stack_pointer_rtx)
11146 m->fs.cfa_offset += UNITS_PER_WORD;
11147 m->fs.sp_offset += UNITS_PER_WORD;
11149 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11150 arg = gen_rtx_REG (word_mode, REGNO (arg));
11152 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11153 gen_rtx_PRE_DEC (Pmode,
11154 stack_pointer_rtx)),
11158 /* Generate an "pop" pattern for input ARG. */
11163 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11164 arg = gen_rtx_REG (word_mode, REGNO (arg));
11166 return gen_rtx_SET (arg,
11167 gen_rtx_MEM (word_mode,
11168 gen_rtx_POST_INC (Pmode,
11169 stack_pointer_rtx)));
11172 /* Return >= 0 if there is an unused call-clobbered register available
11173 for the entire function. */
11175 static unsigned int
11176 ix86_select_alt_pic_regnum (void)
11178 if (ix86_use_pseudo_pic_reg ())
11179 return INVALID_REGNUM;
11183 && !ix86_current_function_calls_tls_descriptor)
11186 /* Can't use the same register for both PIC and DRAP. */
11187 if (crtl->drap_reg)
11188 drap = REGNO (crtl->drap_reg);
11191 for (i = 2; i >= 0; --i)
11192 if (i != drap && !df_regs_ever_live_p (i))
11196 return INVALID_REGNUM;
11199 /* Return TRUE if we need to save REGNO. */
11202 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11204 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11205 && pic_offset_table_rtx)
11207 if (ix86_use_pseudo_pic_reg ())
11209 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11210 _mcount in prologue. */
11211 if (!TARGET_64BIT && flag_pic && crtl->profile)
11214 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11216 || crtl->calls_eh_return
11217 || crtl->uses_const_pool
11218 || cfun->has_nonlocal_label)
11219 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11222 if (crtl->calls_eh_return && maybe_eh_return)
11227 unsigned test = EH_RETURN_DATA_REGNO (i);
11228 if (test == INVALID_REGNUM)
11236 && regno == REGNO (crtl->drap_reg)
11237 && !cfun->machine->no_drap_save_restore)
11240 return (df_regs_ever_live_p (regno)
11241 && !call_used_regs[regno]
11242 && !fixed_regs[regno]
11243 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11246 /* Return number of saved general prupose registers. */
11249 ix86_nsaved_regs (void)
11254 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11255 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11260 /* Return number of saved SSE registers. */
11263 ix86_nsaved_sseregs (void)
11268 if (!TARGET_64BIT_MS_ABI)
11270 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11271 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11276 /* Given FROM and TO register numbers, say whether this elimination is
11277 allowed. If stack alignment is needed, we can only replace argument
11278 pointer with hard frame pointer, or replace frame pointer with stack
11279 pointer. Otherwise, frame pointer elimination is automatically
11280 handled and all other eliminations are valid. */
11283 ix86_can_eliminate (const int from, const int to)
11285 if (stack_realign_fp)
11286 return ((from == ARG_POINTER_REGNUM
11287 && to == HARD_FRAME_POINTER_REGNUM)
11288 || (from == FRAME_POINTER_REGNUM
11289 && to == STACK_POINTER_REGNUM));
11291 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11294 /* Return the offset between two registers, one to be eliminated, and the other
11295 its replacement, at the start of a routine. */
11298 ix86_initial_elimination_offset (int from, int to)
11300 struct ix86_frame frame;
11301 ix86_compute_frame_layout (&frame);
11303 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11304 return frame.hard_frame_pointer_offset;
11305 else if (from == FRAME_POINTER_REGNUM
11306 && to == HARD_FRAME_POINTER_REGNUM)
11307 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11310 gcc_assert (to == STACK_POINTER_REGNUM);
11312 if (from == ARG_POINTER_REGNUM)
11313 return frame.stack_pointer_offset;
11315 gcc_assert (from == FRAME_POINTER_REGNUM);
11316 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11320 /* In a dynamically-aligned function, we can't know the offset from
11321 stack pointer to frame pointer, so we must ensure that setjmp
11322 eliminates fp against the hard fp (%ebp) rather than trying to
11323 index from %esp up to the top of the frame across a gap that is
11324 of unknown (at compile-time) size. */
11326 ix86_builtin_setjmp_frame_value (void)
11328 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11331 /* When using -fsplit-stack, the allocation routines set a field in
11332 the TCB to the bottom of the stack plus this much space, measured
11335 #define SPLIT_STACK_AVAILABLE 256
11337 /* Fill structure ix86_frame about frame of currently computed function. */
11340 ix86_compute_frame_layout (struct ix86_frame *frame)
11342 unsigned HOST_WIDE_INT stack_alignment_needed;
11343 HOST_WIDE_INT offset;
11344 unsigned HOST_WIDE_INT preferred_alignment;
11345 HOST_WIDE_INT size = get_frame_size ();
11346 HOST_WIDE_INT to_allocate;
11348 frame->nregs = ix86_nsaved_regs ();
11349 frame->nsseregs = ix86_nsaved_sseregs ();
11351 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11352 except for function prologues, leaf functions and when the defult
11353 incoming stack boundary is overriden at command line or via
11354 force_align_arg_pointer attribute. */
11355 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11356 && (!crtl->is_leaf || cfun->calls_alloca != 0
11357 || ix86_current_function_calls_tls_descriptor
11358 || ix86_incoming_stack_boundary < 128))
11360 crtl->preferred_stack_boundary = 128;
11361 crtl->stack_alignment_needed = 128;
11364 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11365 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11367 gcc_assert (!size || stack_alignment_needed);
11368 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11369 gcc_assert (preferred_alignment <= stack_alignment_needed);
11371 /* For SEH we have to limit the amount of code movement into the prologue.
11372 At present we do this via a BLOCKAGE, at which point there's very little
11373 scheduling that can be done, which means that there's very little point
11374 in doing anything except PUSHs. */
11376 cfun->machine->use_fast_prologue_epilogue = false;
11378 /* During reload iteration the amount of registers saved can change.
11379 Recompute the value as needed. Do not recompute when amount of registers
11380 didn't change as reload does multiple calls to the function and does not
11381 expect the decision to change within single iteration. */
11382 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11383 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11385 int count = frame->nregs;
11386 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11388 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11390 /* The fast prologue uses move instead of push to save registers. This
11391 is significantly longer, but also executes faster as modern hardware
11392 can execute the moves in parallel, but can't do that for push/pop.
11394 Be careful about choosing what prologue to emit: When function takes
11395 many instructions to execute we may use slow version as well as in
11396 case function is known to be outside hot spot (this is known with
11397 feedback only). Weight the size of function by number of registers
11398 to save as it is cheap to use one or two push instructions but very
11399 slow to use many of them. */
11401 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11402 if (node->frequency < NODE_FREQUENCY_NORMAL
11403 || (flag_branch_probabilities
11404 && node->frequency < NODE_FREQUENCY_HOT))
11405 cfun->machine->use_fast_prologue_epilogue = false;
11407 cfun->machine->use_fast_prologue_epilogue
11408 = !expensive_function_p (count);
11411 frame->save_regs_using_mov
11412 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11413 /* If static stack checking is enabled and done with probes,
11414 the registers need to be saved before allocating the frame. */
11415 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11417 /* Skip return address. */
11418 offset = UNITS_PER_WORD;
11420 /* Skip pushed static chain. */
11421 if (ix86_static_chain_on_stack)
11422 offset += UNITS_PER_WORD;
11424 /* Skip saved base pointer. */
11425 if (frame_pointer_needed)
11426 offset += UNITS_PER_WORD;
11427 frame->hfp_save_offset = offset;
11429 /* The traditional frame pointer location is at the top of the frame. */
11430 frame->hard_frame_pointer_offset = offset;
11432 /* Register save area */
11433 offset += frame->nregs * UNITS_PER_WORD;
11434 frame->reg_save_offset = offset;
11436 /* On SEH target, registers are pushed just before the frame pointer
11439 frame->hard_frame_pointer_offset = offset;
11441 /* Align and set SSE register save area. */
11442 if (frame->nsseregs)
11444 /* The only ABI that has saved SSE registers (Win64) also has a
11445 16-byte aligned default stack, and thus we don't need to be
11446 within the re-aligned local stack frame to save them. In case
11447 incoming stack boundary is aligned to less than 16 bytes,
11448 unaligned move of SSE register will be emitted, so there is
11449 no point to round up the SSE register save area outside the
11450 re-aligned local stack frame to 16 bytes. */
11451 if (ix86_incoming_stack_boundary >= 128)
11452 offset = ROUND_UP (offset, 16);
11453 offset += frame->nsseregs * 16;
11455 frame->sse_reg_save_offset = offset;
11457 /* The re-aligned stack starts here. Values before this point are not
11458 directly comparable with values below this point. In order to make
11459 sure that no value happens to be the same before and after, force
11460 the alignment computation below to add a non-zero value. */
11461 if (stack_realign_fp)
11462 offset = ROUND_UP (offset, stack_alignment_needed);
11465 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11466 offset += frame->va_arg_size;
11468 /* Align start of frame for local function. */
11469 if (stack_realign_fp
11470 || offset != frame->sse_reg_save_offset
11473 || cfun->calls_alloca
11474 || ix86_current_function_calls_tls_descriptor)
11475 offset = ROUND_UP (offset, stack_alignment_needed);
11477 /* Frame pointer points here. */
11478 frame->frame_pointer_offset = offset;
11482 /* Add outgoing arguments area. Can be skipped if we eliminated
11483 all the function calls as dead code.
11484 Skipping is however impossible when function calls alloca. Alloca
11485 expander assumes that last crtl->outgoing_args_size
11486 of stack frame are unused. */
11487 if (ACCUMULATE_OUTGOING_ARGS
11488 && (!crtl->is_leaf || cfun->calls_alloca
11489 || ix86_current_function_calls_tls_descriptor))
11491 offset += crtl->outgoing_args_size;
11492 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11495 frame->outgoing_arguments_size = 0;
11497 /* Align stack boundary. Only needed if we're calling another function
11498 or using alloca. */
11499 if (!crtl->is_leaf || cfun->calls_alloca
11500 || ix86_current_function_calls_tls_descriptor)
11501 offset = ROUND_UP (offset, preferred_alignment);
11503 /* We've reached end of stack frame. */
11504 frame->stack_pointer_offset = offset;
11506 /* Size prologue needs to allocate. */
11507 to_allocate = offset - frame->sse_reg_save_offset;
11509 if ((!to_allocate && frame->nregs <= 1)
11510 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11511 frame->save_regs_using_mov = false;
11513 if (ix86_using_red_zone ()
11514 && crtl->sp_is_unchanging
11516 && !ix86_current_function_calls_tls_descriptor)
11518 frame->red_zone_size = to_allocate;
11519 if (frame->save_regs_using_mov)
11520 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11521 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11522 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11525 frame->red_zone_size = 0;
11526 frame->stack_pointer_offset -= frame->red_zone_size;
11528 /* The SEH frame pointer location is near the bottom of the frame.
11529 This is enforced by the fact that the difference between the
11530 stack pointer and the frame pointer is limited to 240 bytes in
11531 the unwind data structure. */
11534 HOST_WIDE_INT diff;
11536 /* If we can leave the frame pointer where it is, do so. Also, returns
11537 the establisher frame for __builtin_frame_address (0). */
11538 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11539 if (diff <= SEH_MAX_FRAME_SIZE
11540 && (diff > 240 || (diff & 15) != 0)
11541 && !crtl->accesses_prior_frames)
11543 /* Ideally we'd determine what portion of the local stack frame
11544 (within the constraint of the lowest 240) is most heavily used.
11545 But without that complication, simply bias the frame pointer
11546 by 128 bytes so as to maximize the amount of the local stack
11547 frame that is addressable with 8-bit offsets. */
11548 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11553 /* This is semi-inlined memory_address_length, but simplified
11554 since we know that we're always dealing with reg+offset, and
11555 to avoid having to create and discard all that rtl. */
11558 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11564 /* EBP and R13 cannot be encoded without an offset. */
11565 len = (regno == BP_REG || regno == R13_REG);
11567 else if (IN_RANGE (offset, -128, 127))
11570 /* ESP and R12 must be encoded with a SIB byte. */
11571 if (regno == SP_REG || regno == R12_REG)
11577 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11578 The valid base registers are taken from CFUN->MACHINE->FS. */
11581 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11583 const struct machine_function *m = cfun->machine;
11584 rtx base_reg = NULL;
11585 HOST_WIDE_INT base_offset = 0;
11587 if (m->use_fast_prologue_epilogue)
11589 /* Choose the base register most likely to allow the most scheduling
11590 opportunities. Generally FP is valid throughout the function,
11591 while DRAP must be reloaded within the epilogue. But choose either
11592 over the SP due to increased encoding size. */
11594 if (m->fs.fp_valid)
11596 base_reg = hard_frame_pointer_rtx;
11597 base_offset = m->fs.fp_offset - cfa_offset;
11599 else if (m->fs.drap_valid)
11601 base_reg = crtl->drap_reg;
11602 base_offset = 0 - cfa_offset;
11604 else if (m->fs.sp_valid)
11606 base_reg = stack_pointer_rtx;
11607 base_offset = m->fs.sp_offset - cfa_offset;
11612 HOST_WIDE_INT toffset;
11613 int len = 16, tlen;
11615 /* Choose the base register with the smallest address encoding.
11616 With a tie, choose FP > DRAP > SP. */
11617 if (m->fs.sp_valid)
11619 base_reg = stack_pointer_rtx;
11620 base_offset = m->fs.sp_offset - cfa_offset;
11621 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11623 if (m->fs.drap_valid)
11625 toffset = 0 - cfa_offset;
11626 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11629 base_reg = crtl->drap_reg;
11630 base_offset = toffset;
11634 if (m->fs.fp_valid)
11636 toffset = m->fs.fp_offset - cfa_offset;
11637 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11640 base_reg = hard_frame_pointer_rtx;
11641 base_offset = toffset;
11646 gcc_assert (base_reg != NULL);
11648 return plus_constant (Pmode, base_reg, base_offset);
11651 /* Emit code to save registers in the prologue. */
11654 ix86_emit_save_regs (void)
11656 unsigned int regno;
11659 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11660 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11662 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11663 RTX_FRAME_RELATED_P (insn) = 1;
11667 /* Emit a single register save at CFA - CFA_OFFSET. */
11670 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11671 HOST_WIDE_INT cfa_offset)
11673 struct machine_function *m = cfun->machine;
11674 rtx reg = gen_rtx_REG (mode, regno);
11675 rtx unspec = NULL_RTX;
11676 rtx mem, addr, base, insn;
11677 unsigned int align;
11679 addr = choose_baseaddr (cfa_offset);
11680 mem = gen_frame_mem (mode, addr);
11682 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11683 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11684 set_mem_align (mem, align);
11686 /* SSE saves are not within re-aligned local stack frame.
11687 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11688 to emit unaligned store. */
11689 if (mode == V4SFmode && align < 128)
11690 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11692 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11693 RTX_FRAME_RELATED_P (insn) = 1;
11696 if (GET_CODE (base) == PLUS)
11697 base = XEXP (base, 0);
11698 gcc_checking_assert (REG_P (base));
11700 /* When saving registers into a re-aligned local stack frame, avoid
11701 any tricky guessing by dwarf2out. */
11702 if (m->fs.realigned)
11704 gcc_checking_assert (stack_realign_drap);
11706 if (regno == REGNO (crtl->drap_reg))
11708 /* A bit of a hack. We force the DRAP register to be saved in
11709 the re-aligned stack frame, which provides us with a copy
11710 of the CFA that will last past the prologue. Install it. */
11711 gcc_checking_assert (cfun->machine->fs.fp_valid);
11712 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11713 cfun->machine->fs.fp_offset - cfa_offset);
11714 mem = gen_rtx_MEM (mode, addr);
11715 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11719 /* The frame pointer is a stable reference within the
11720 aligned frame. Use it. */
11721 gcc_checking_assert (cfun->machine->fs.fp_valid);
11722 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11723 cfun->machine->fs.fp_offset - cfa_offset);
11724 mem = gen_rtx_MEM (mode, addr);
11725 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11729 /* The memory may not be relative to the current CFA register,
11730 which means that we may need to generate a new pattern for
11731 use by the unwind info. */
11732 else if (base != m->fs.cfa_reg)
11734 addr = plus_constant (Pmode, m->fs.cfa_reg,
11735 m->fs.cfa_offset - cfa_offset);
11736 mem = gen_rtx_MEM (mode, addr);
11737 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11740 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11743 /* Emit code to save registers using MOV insns.
11744 First register is stored at CFA - CFA_OFFSET. */
11746 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11748 unsigned int regno;
11750 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11751 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11753 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11754 cfa_offset -= UNITS_PER_WORD;
11758 /* Emit code to save SSE registers using MOV insns.
11759 First register is stored at CFA - CFA_OFFSET. */
11761 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11763 unsigned int regno;
11765 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11766 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11768 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11769 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11773 static GTY(()) rtx queued_cfa_restores;
11775 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11776 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11777 Don't add the note if the previously saved value will be left untouched
11778 within stack red-zone till return, as unwinders can find the same value
11779 in the register and on the stack. */
11782 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11784 if (!crtl->shrink_wrapped
11785 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11790 add_reg_note (insn, REG_CFA_RESTORE, reg);
11791 RTX_FRAME_RELATED_P (insn) = 1;
11794 queued_cfa_restores
11795 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11798 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11801 ix86_add_queued_cfa_restore_notes (rtx insn)
11804 if (!queued_cfa_restores)
11806 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11808 XEXP (last, 1) = REG_NOTES (insn);
11809 REG_NOTES (insn) = queued_cfa_restores;
11810 queued_cfa_restores = NULL_RTX;
11811 RTX_FRAME_RELATED_P (insn) = 1;
11814 /* Expand prologue or epilogue stack adjustment.
11815 The pattern exist to put a dependency on all ebp-based memory accesses.
11816 STYLE should be negative if instructions should be marked as frame related,
11817 zero if %r11 register is live and cannot be freely used and positive
11821 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11822 int style, bool set_cfa)
11824 struct machine_function *m = cfun->machine;
11826 bool add_frame_related_expr = false;
11828 if (Pmode == SImode)
11829 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11830 else if (x86_64_immediate_operand (offset, DImode))
11831 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11835 /* r11 is used by indirect sibcall return as well, set before the
11836 epilogue and used after the epilogue. */
11838 tmp = gen_rtx_REG (DImode, R11_REG);
11841 gcc_assert (src != hard_frame_pointer_rtx
11842 && dest != hard_frame_pointer_rtx);
11843 tmp = hard_frame_pointer_rtx;
11845 insn = emit_insn (gen_rtx_SET (tmp, offset));
11847 add_frame_related_expr = true;
11849 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11852 insn = emit_insn (insn);
11854 ix86_add_queued_cfa_restore_notes (insn);
11860 gcc_assert (m->fs.cfa_reg == src);
11861 m->fs.cfa_offset += INTVAL (offset);
11862 m->fs.cfa_reg = dest;
11864 r = gen_rtx_PLUS (Pmode, src, offset);
11865 r = gen_rtx_SET (dest, r);
11866 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11867 RTX_FRAME_RELATED_P (insn) = 1;
11869 else if (style < 0)
11871 RTX_FRAME_RELATED_P (insn) = 1;
11872 if (add_frame_related_expr)
11874 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11875 r = gen_rtx_SET (dest, r);
11876 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11880 if (dest == stack_pointer_rtx)
11882 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11883 bool valid = m->fs.sp_valid;
11885 if (src == hard_frame_pointer_rtx)
11887 valid = m->fs.fp_valid;
11888 ooffset = m->fs.fp_offset;
11890 else if (src == crtl->drap_reg)
11892 valid = m->fs.drap_valid;
11897 /* Else there are two possibilities: SP itself, which we set
11898 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11899 taken care of this by hand along the eh_return path. */
11900 gcc_checking_assert (src == stack_pointer_rtx
11901 || offset == const0_rtx);
11904 m->fs.sp_offset = ooffset - INTVAL (offset);
11905 m->fs.sp_valid = valid;
11909 /* Find an available register to be used as dynamic realign argument
11910 pointer regsiter. Such a register will be written in prologue and
11911 used in begin of body, so it must not be
11912 1. parameter passing register.
11914 We reuse static-chain register if it is available. Otherwise, we
11915 use DI for i386 and R13 for x86-64. We chose R13 since it has
11918 Return: the regno of chosen register. */
11920 static unsigned int
11921 find_drap_reg (void)
11923 tree decl = cfun->decl;
11927 /* Use R13 for nested function or function need static chain.
11928 Since function with tail call may use any caller-saved
11929 registers in epilogue, DRAP must not use caller-saved
11930 register in such case. */
11931 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11938 /* Use DI for nested function or function need static chain.
11939 Since function with tail call may use any caller-saved
11940 registers in epilogue, DRAP must not use caller-saved
11941 register in such case. */
11942 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11945 /* Reuse static chain register if it isn't used for parameter
11947 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11949 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11950 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11957 /* Handle a "force_align_arg_pointer" attribute. */
11960 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11961 tree, int, bool *no_add_attrs)
11963 if (TREE_CODE (*node) != FUNCTION_TYPE
11964 && TREE_CODE (*node) != METHOD_TYPE
11965 && TREE_CODE (*node) != FIELD_DECL
11966 && TREE_CODE (*node) != TYPE_DECL)
11968 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11970 *no_add_attrs = true;
11976 /* Return minimum incoming stack alignment. */
11978 static unsigned int
11979 ix86_minimum_incoming_stack_boundary (bool sibcall)
11981 unsigned int incoming_stack_boundary;
11983 /* Prefer the one specified at command line. */
11984 if (ix86_user_incoming_stack_boundary)
11985 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11986 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11987 if -mstackrealign is used, it isn't used for sibcall check and
11988 estimated stack alignment is 128bit. */
11990 && ix86_force_align_arg_pointer
11991 && crtl->stack_alignment_estimated == 128)
11992 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11994 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11996 /* Incoming stack alignment can be changed on individual functions
11997 via force_align_arg_pointer attribute. We use the smallest
11998 incoming stack boundary. */
11999 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12000 && lookup_attribute (ix86_force_align_arg_pointer_string,
12001 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12002 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12004 /* The incoming stack frame has to be aligned at least at
12005 parm_stack_boundary. */
12006 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12007 incoming_stack_boundary = crtl->parm_stack_boundary;
12009 /* Stack at entrance of main is aligned by runtime. We use the
12010 smallest incoming stack boundary. */
12011 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12012 && DECL_NAME (current_function_decl)
12013 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12014 && DECL_FILE_SCOPE_P (current_function_decl))
12015 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12017 return incoming_stack_boundary;
12020 /* Update incoming stack boundary and estimated stack alignment. */
12023 ix86_update_stack_boundary (void)
12025 ix86_incoming_stack_boundary
12026 = ix86_minimum_incoming_stack_boundary (false);
12028 /* x86_64 vararg needs 16byte stack alignment for register save
12032 && crtl->stack_alignment_estimated < 128)
12033 crtl->stack_alignment_estimated = 128;
12035 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
12036 if (ix86_tls_descriptor_calls_expanded_in_cfun
12037 && crtl->preferred_stack_boundary < 128)
12038 crtl->preferred_stack_boundary = 128;
12041 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12042 needed or an rtx for DRAP otherwise. */
12045 ix86_get_drap_rtx (void)
12047 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12048 crtl->need_drap = true;
12050 if (stack_realign_drap)
12052 /* Assign DRAP to vDRAP and returns vDRAP */
12053 unsigned int regno = find_drap_reg ();
12056 rtx_insn *seq, *insn;
12058 arg_ptr = gen_rtx_REG (Pmode, regno);
12059 crtl->drap_reg = arg_ptr;
12062 drap_vreg = copy_to_reg (arg_ptr);
12063 seq = get_insns ();
12066 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12069 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12070 RTX_FRAME_RELATED_P (insn) = 1;
12078 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12081 ix86_internal_arg_pointer (void)
12083 return virtual_incoming_args_rtx;
12086 struct scratch_reg {
12091 /* Return a short-lived scratch register for use on function entry.
12092 In 32-bit mode, it is valid only after the registers are saved
12093 in the prologue. This register must be released by means of
12094 release_scratch_register_on_entry once it is dead. */
12097 get_scratch_register_on_entry (struct scratch_reg *sr)
12105 /* We always use R11 in 64-bit mode. */
12110 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12112 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12114 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12115 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12116 int regparm = ix86_function_regparm (fntype, decl);
12118 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12120 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12121 for the static chain register. */
12122 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12123 && drap_regno != AX_REG)
12125 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12126 for the static chain register. */
12127 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12129 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12131 /* ecx is the static chain register. */
12132 else if (regparm < 3 && !fastcall_p && !thiscall_p
12134 && drap_regno != CX_REG)
12136 else if (ix86_save_reg (BX_REG, true))
12138 /* esi is the static chain register. */
12139 else if (!(regparm == 3 && static_chain_p)
12140 && ix86_save_reg (SI_REG, true))
12142 else if (ix86_save_reg (DI_REG, true))
12146 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12151 sr->reg = gen_rtx_REG (Pmode, regno);
12154 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12155 RTX_FRAME_RELATED_P (insn) = 1;
12159 /* Release a scratch register obtained from the preceding function. */
12162 release_scratch_register_on_entry (struct scratch_reg *sr)
12166 struct machine_function *m = cfun->machine;
12167 rtx x, insn = emit_insn (gen_pop (sr->reg));
12169 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12170 RTX_FRAME_RELATED_P (insn) = 1;
12171 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12172 x = gen_rtx_SET (stack_pointer_rtx, x);
12173 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12174 m->fs.sp_offset -= UNITS_PER_WORD;
12178 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12180 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12183 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12185 /* We skip the probe for the first interval + a small dope of 4 words and
12186 probe that many bytes past the specified size to maintain a protection
12187 area at the botton of the stack. */
12188 const int dope = 4 * UNITS_PER_WORD;
12189 rtx size_rtx = GEN_INT (size), last;
12191 /* See if we have a constant small number of probes to generate. If so,
12192 that's the easy case. The run-time loop is made up of 9 insns in the
12193 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12194 for n # of intervals. */
12195 if (size <= 4 * PROBE_INTERVAL)
12197 HOST_WIDE_INT i, adjust;
12198 bool first_probe = true;
12200 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12201 values of N from 1 until it exceeds SIZE. If only one probe is
12202 needed, this will not generate any code. Then adjust and probe
12203 to PROBE_INTERVAL + SIZE. */
12204 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12208 adjust = 2 * PROBE_INTERVAL + dope;
12209 first_probe = false;
12212 adjust = PROBE_INTERVAL;
12214 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12215 plus_constant (Pmode, stack_pointer_rtx,
12217 emit_stack_probe (stack_pointer_rtx);
12221 adjust = size + PROBE_INTERVAL + dope;
12223 adjust = size + PROBE_INTERVAL - i;
12225 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12226 plus_constant (Pmode, stack_pointer_rtx,
12228 emit_stack_probe (stack_pointer_rtx);
12230 /* Adjust back to account for the additional first interval. */
12231 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12232 plus_constant (Pmode, stack_pointer_rtx,
12233 PROBE_INTERVAL + dope)));
12236 /* Otherwise, do the same as above, but in a loop. Note that we must be
12237 extra careful with variables wrapping around because we might be at
12238 the very top (or the very bottom) of the address space and we have
12239 to be able to handle this case properly; in particular, we use an
12240 equality test for the loop condition. */
12243 HOST_WIDE_INT rounded_size;
12244 struct scratch_reg sr;
12246 get_scratch_register_on_entry (&sr);
12249 /* Step 1: round SIZE to the previous multiple of the interval. */
12251 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12254 /* Step 2: compute initial and final value of the loop counter. */
12256 /* SP = SP_0 + PROBE_INTERVAL. */
12257 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12258 plus_constant (Pmode, stack_pointer_rtx,
12259 - (PROBE_INTERVAL + dope))));
12261 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12262 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12263 emit_insn (gen_rtx_SET (sr.reg,
12264 plus_constant (Pmode, stack_pointer_rtx,
12268 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12269 emit_insn (gen_rtx_SET (sr.reg,
12270 gen_rtx_PLUS (Pmode, sr.reg,
12271 stack_pointer_rtx)));
12275 /* Step 3: the loop
12279 SP = SP + PROBE_INTERVAL
12282 while (SP != LAST_ADDR)
12284 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12285 values of N from 1 until it is equal to ROUNDED_SIZE. */
12287 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12290 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12291 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12293 if (size != rounded_size)
12295 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12296 plus_constant (Pmode, stack_pointer_rtx,
12297 rounded_size - size)));
12298 emit_stack_probe (stack_pointer_rtx);
12301 /* Adjust back to account for the additional first interval. */
12302 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12303 plus_constant (Pmode, stack_pointer_rtx,
12304 PROBE_INTERVAL + dope)));
12306 release_scratch_register_on_entry (&sr);
12309 /* Even if the stack pointer isn't the CFA register, we need to correctly
12310 describe the adjustments made to it, in particular differentiate the
12311 frame-related ones from the frame-unrelated ones. */
12314 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12315 XVECEXP (expr, 0, 0)
12316 = gen_rtx_SET (stack_pointer_rtx,
12317 plus_constant (Pmode, stack_pointer_rtx, -size));
12318 XVECEXP (expr, 0, 1)
12319 = gen_rtx_SET (stack_pointer_rtx,
12320 plus_constant (Pmode, stack_pointer_rtx,
12321 PROBE_INTERVAL + dope + size));
12322 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12323 RTX_FRAME_RELATED_P (last) = 1;
12325 cfun->machine->fs.sp_offset += size;
12328 /* Make sure nothing is scheduled before we are done. */
12329 emit_insn (gen_blockage ());
12332 /* Adjust the stack pointer up to REG while probing it. */
12335 output_adjust_stack_and_probe (rtx reg)
12337 static int labelno = 0;
12341 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12344 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12346 /* SP = SP + PROBE_INTERVAL. */
12347 xops[0] = stack_pointer_rtx;
12348 xops[1] = GEN_INT (PROBE_INTERVAL);
12349 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12352 xops[1] = const0_rtx;
12353 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12355 /* Test if SP == LAST_ADDR. */
12356 xops[0] = stack_pointer_rtx;
12358 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12361 fputs ("\tjne\t", asm_out_file);
12362 assemble_name_raw (asm_out_file, loop_lab);
12363 fputc ('\n', asm_out_file);
12368 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12369 inclusive. These are offsets from the current stack pointer. */
12372 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12374 /* See if we have a constant small number of probes to generate. If so,
12375 that's the easy case. The run-time loop is made up of 6 insns in the
12376 generic case while the compile-time loop is made up of n insns for n #
12378 if (size <= 6 * PROBE_INTERVAL)
12382 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12383 it exceeds SIZE. If only one probe is needed, this will not
12384 generate any code. Then probe at FIRST + SIZE. */
12385 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12386 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12389 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12393 /* Otherwise, do the same as above, but in a loop. Note that we must be
12394 extra careful with variables wrapping around because we might be at
12395 the very top (or the very bottom) of the address space and we have
12396 to be able to handle this case properly; in particular, we use an
12397 equality test for the loop condition. */
12400 HOST_WIDE_INT rounded_size, last;
12401 struct scratch_reg sr;
12403 get_scratch_register_on_entry (&sr);
12406 /* Step 1: round SIZE to the previous multiple of the interval. */
12408 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12411 /* Step 2: compute initial and final value of the loop counter. */
12413 /* TEST_OFFSET = FIRST. */
12414 emit_move_insn (sr.reg, GEN_INT (-first));
12416 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12417 last = first + rounded_size;
12420 /* Step 3: the loop
12424 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12427 while (TEST_ADDR != LAST_ADDR)
12429 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12430 until it is equal to ROUNDED_SIZE. */
12432 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12435 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12436 that SIZE is equal to ROUNDED_SIZE. */
12438 if (size != rounded_size)
12439 emit_stack_probe (plus_constant (Pmode,
12440 gen_rtx_PLUS (Pmode,
12443 rounded_size - size));
12445 release_scratch_register_on_entry (&sr);
12448 /* Make sure nothing is scheduled before we are done. */
12449 emit_insn (gen_blockage ());
12452 /* Probe a range of stack addresses from REG to END, inclusive. These are
12453 offsets from the current stack pointer. */
12456 output_probe_stack_range (rtx reg, rtx end)
12458 static int labelno = 0;
12462 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12465 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12467 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12469 xops[1] = GEN_INT (PROBE_INTERVAL);
12470 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12472 /* Probe at TEST_ADDR. */
12473 xops[0] = stack_pointer_rtx;
12475 xops[2] = const0_rtx;
12476 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12478 /* Test if TEST_ADDR == LAST_ADDR. */
12481 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12484 fputs ("\tjne\t", asm_out_file);
12485 assemble_name_raw (asm_out_file, loop_lab);
12486 fputc ('\n', asm_out_file);
12491 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12492 to be generated in correct form. */
12494 ix86_finalize_stack_realign_flags (void)
12496 /* Check if stack realign is really needed after reload, and
12497 stores result in cfun */
12498 unsigned int incoming_stack_boundary
12499 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12500 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12501 unsigned int stack_realign
12502 = (incoming_stack_boundary
12503 < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
12504 ? crtl->max_used_stack_slot_alignment
12505 : crtl->stack_alignment_needed));
12507 if (crtl->stack_realign_finalized)
12509 /* After stack_realign_needed is finalized, we can't no longer
12511 gcc_assert (crtl->stack_realign_needed == stack_realign);
12515 /* If the only reason for frame_pointer_needed is that we conservatively
12516 assumed stack realignment might be needed, but in the end nothing that
12517 needed the stack alignment had been spilled, clear frame_pointer_needed
12518 and say we don't need stack realignment. */
12520 && frame_pointer_needed
12522 && flag_omit_frame_pointer
12523 && crtl->sp_is_unchanging
12524 && !ix86_current_function_calls_tls_descriptor
12525 && !crtl->accesses_prior_frames
12526 && !cfun->calls_alloca
12527 && !crtl->calls_eh_return
12528 /* See ira_setup_eliminable_regset for the rationale. */
12529 && !(STACK_CHECK_MOVING_SP
12530 && flag_stack_check
12532 && cfun->can_throw_non_call_exceptions)
12533 && !ix86_frame_pointer_required ()
12534 && get_frame_size () == 0
12535 && ix86_nsaved_sseregs () == 0
12536 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12538 HARD_REG_SET set_up_by_prologue, prologue_used;
12541 CLEAR_HARD_REG_SET (prologue_used);
12542 CLEAR_HARD_REG_SET (set_up_by_prologue);
12543 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12544 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12545 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12546 HARD_FRAME_POINTER_REGNUM);
12547 FOR_EACH_BB_FN (bb, cfun)
12550 FOR_BB_INSNS (bb, insn)
12551 if (NONDEBUG_INSN_P (insn)
12552 && requires_stack_frame_p (insn, prologue_used,
12553 set_up_by_prologue))
12555 crtl->stack_realign_needed = stack_realign;
12556 crtl->stack_realign_finalized = true;
12561 /* If drap has been set, but it actually isn't live at the start
12562 of the function, there is no reason to set it up. */
12563 if (crtl->drap_reg)
12565 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12566 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12568 crtl->drap_reg = NULL_RTX;
12569 crtl->need_drap = false;
12573 cfun->machine->no_drap_save_restore = true;
12575 frame_pointer_needed = false;
12576 stack_realign = false;
12577 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12578 crtl->stack_alignment_needed = incoming_stack_boundary;
12579 crtl->stack_alignment_estimated = incoming_stack_boundary;
12580 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12581 crtl->preferred_stack_boundary = incoming_stack_boundary;
12582 df_finish_pass (true);
12583 df_scan_alloc (NULL);
12585 df_compute_regs_ever_live (true);
12589 crtl->stack_realign_needed = stack_realign;
12590 crtl->stack_realign_finalized = true;
12593 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12596 ix86_elim_entry_set_got (rtx reg)
12598 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12599 rtx_insn *c_insn = BB_HEAD (bb);
12600 if (!NONDEBUG_INSN_P (c_insn))
12601 c_insn = next_nonnote_nondebug_insn (c_insn);
12602 if (c_insn && NONJUMP_INSN_P (c_insn))
12604 rtx pat = PATTERN (c_insn);
12605 if (GET_CODE (pat) == PARALLEL)
12607 rtx vec = XVECEXP (pat, 0, 0);
12608 if (GET_CODE (vec) == SET
12609 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12610 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12611 delete_insn (c_insn);
12616 /* Expand the prologue into a bunch of separate insns. */
12619 ix86_expand_prologue (void)
12621 struct machine_function *m = cfun->machine;
12623 struct ix86_frame frame;
12624 HOST_WIDE_INT allocate;
12625 bool int_registers_saved;
12626 bool sse_registers_saved;
12627 rtx static_chain = NULL_RTX;
12629 ix86_finalize_stack_realign_flags ();
12631 /* DRAP should not coexist with stack_realign_fp */
12632 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12634 memset (&m->fs, 0, sizeof (m->fs));
12636 /* Initialize CFA state for before the prologue. */
12637 m->fs.cfa_reg = stack_pointer_rtx;
12638 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12640 /* Track SP offset to the CFA. We continue tracking this after we've
12641 swapped the CFA register away from SP. In the case of re-alignment
12642 this is fudged; we're interested to offsets within the local frame. */
12643 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12644 m->fs.sp_valid = true;
12646 ix86_compute_frame_layout (&frame);
12648 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12650 /* We should have already generated an error for any use of
12651 ms_hook on a nested function. */
12652 gcc_checking_assert (!ix86_static_chain_on_stack);
12654 /* Check if profiling is active and we shall use profiling before
12655 prologue variant. If so sorry. */
12656 if (crtl->profile && flag_fentry != 0)
12657 sorry ("ms_hook_prologue attribute isn%'t compatible "
12658 "with -mfentry for 32-bit");
12660 /* In ix86_asm_output_function_label we emitted:
12661 8b ff movl.s %edi,%edi
12663 8b ec movl.s %esp,%ebp
12665 This matches the hookable function prologue in Win32 API
12666 functions in Microsoft Windows XP Service Pack 2 and newer.
12667 Wine uses this to enable Windows apps to hook the Win32 API
12668 functions provided by Wine.
12670 What that means is that we've already set up the frame pointer. */
12672 if (frame_pointer_needed
12673 && !(crtl->drap_reg && crtl->stack_realign_needed))
12677 /* We've decided to use the frame pointer already set up.
12678 Describe this to the unwinder by pretending that both
12679 push and mov insns happen right here.
12681 Putting the unwind info here at the end of the ms_hook
12682 is done so that we can make absolutely certain we get
12683 the required byte sequence at the start of the function,
12684 rather than relying on an assembler that can produce
12685 the exact encoding required.
12687 However it does mean (in the unpatched case) that we have
12688 a 1 insn window where the asynchronous unwind info is
12689 incorrect. However, if we placed the unwind info at
12690 its correct location we would have incorrect unwind info
12691 in the patched case. Which is probably all moot since
12692 I don't expect Wine generates dwarf2 unwind info for the
12693 system libraries that use this feature. */
12695 insn = emit_insn (gen_blockage ());
12697 push = gen_push (hard_frame_pointer_rtx);
12698 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12699 stack_pointer_rtx);
12700 RTX_FRAME_RELATED_P (push) = 1;
12701 RTX_FRAME_RELATED_P (mov) = 1;
12703 RTX_FRAME_RELATED_P (insn) = 1;
12704 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12705 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12707 /* Note that gen_push incremented m->fs.cfa_offset, even
12708 though we didn't emit the push insn here. */
12709 m->fs.cfa_reg = hard_frame_pointer_rtx;
12710 m->fs.fp_offset = m->fs.cfa_offset;
12711 m->fs.fp_valid = true;
12715 /* The frame pointer is not needed so pop %ebp again.
12716 This leaves us with a pristine state. */
12717 emit_insn (gen_pop (hard_frame_pointer_rtx));
12721 /* The first insn of a function that accepts its static chain on the
12722 stack is to push the register that would be filled in by a direct
12723 call. This insn will be skipped by the trampoline. */
12724 else if (ix86_static_chain_on_stack)
12726 static_chain = ix86_static_chain (cfun->decl, false);
12727 insn = emit_insn (gen_push (static_chain));
12728 emit_insn (gen_blockage ());
12730 /* We don't want to interpret this push insn as a register save,
12731 only as a stack adjustment. The real copy of the register as
12732 a save will be done later, if needed. */
12733 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12734 t = gen_rtx_SET (stack_pointer_rtx, t);
12735 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12736 RTX_FRAME_RELATED_P (insn) = 1;
12739 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12740 of DRAP is needed and stack realignment is really needed after reload */
12741 if (stack_realign_drap)
12743 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12745 /* Only need to push parameter pointer reg if it is caller saved. */
12746 if (!call_used_regs[REGNO (crtl->drap_reg)])
12748 /* Push arg pointer reg */
12749 insn = emit_insn (gen_push (crtl->drap_reg));
12750 RTX_FRAME_RELATED_P (insn) = 1;
12753 /* Grab the argument pointer. */
12754 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12755 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12756 RTX_FRAME_RELATED_P (insn) = 1;
12757 m->fs.cfa_reg = crtl->drap_reg;
12758 m->fs.cfa_offset = 0;
12760 /* Align the stack. */
12761 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12763 GEN_INT (-align_bytes)));
12764 RTX_FRAME_RELATED_P (insn) = 1;
12766 /* Replicate the return address on the stack so that return
12767 address can be reached via (argp - 1) slot. This is needed
12768 to implement macro RETURN_ADDR_RTX and intrinsic function
12769 expand_builtin_return_addr etc. */
12770 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12771 t = gen_frame_mem (word_mode, t);
12772 insn = emit_insn (gen_push (t));
12773 RTX_FRAME_RELATED_P (insn) = 1;
12775 /* For the purposes of frame and register save area addressing,
12776 we've started over with a new frame. */
12777 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12778 m->fs.realigned = true;
12782 /* Replicate static chain on the stack so that static chain
12783 can be reached via (argp - 2) slot. This is needed for
12784 nested function with stack realignment. */
12785 insn = emit_insn (gen_push (static_chain));
12786 RTX_FRAME_RELATED_P (insn) = 1;
12790 int_registers_saved = (frame.nregs == 0);
12791 sse_registers_saved = (frame.nsseregs == 0);
12793 if (frame_pointer_needed && !m->fs.fp_valid)
12795 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12796 slower on all targets. Also sdb doesn't like it. */
12797 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12798 RTX_FRAME_RELATED_P (insn) = 1;
12800 /* Push registers now, before setting the frame pointer
12802 if (!int_registers_saved
12804 && !frame.save_regs_using_mov)
12806 ix86_emit_save_regs ();
12807 int_registers_saved = true;
12808 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12811 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12813 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12814 RTX_FRAME_RELATED_P (insn) = 1;
12816 if (m->fs.cfa_reg == stack_pointer_rtx)
12817 m->fs.cfa_reg = hard_frame_pointer_rtx;
12818 m->fs.fp_offset = m->fs.sp_offset;
12819 m->fs.fp_valid = true;
12823 if (!int_registers_saved)
12825 /* If saving registers via PUSH, do so now. */
12826 if (!frame.save_regs_using_mov)
12828 ix86_emit_save_regs ();
12829 int_registers_saved = true;
12830 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12833 /* When using red zone we may start register saving before allocating
12834 the stack frame saving one cycle of the prologue. However, avoid
12835 doing this if we have to probe the stack; at least on x86_64 the
12836 stack probe can turn into a call that clobbers a red zone location. */
12837 else if (ix86_using_red_zone ()
12838 && (! TARGET_STACK_PROBE
12839 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12841 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12842 int_registers_saved = true;
12846 if (stack_realign_fp)
12848 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12849 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12851 /* The computation of the size of the re-aligned stack frame means
12852 that we must allocate the size of the register save area before
12853 performing the actual alignment. Otherwise we cannot guarantee
12854 that there's enough storage above the realignment point. */
12855 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12856 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12857 GEN_INT (m->fs.sp_offset
12858 - frame.sse_reg_save_offset),
12861 /* Align the stack. */
12862 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12864 GEN_INT (-align_bytes)));
12866 /* For the purposes of register save area addressing, the stack
12867 pointer is no longer valid. As for the value of sp_offset,
12868 see ix86_compute_frame_layout, which we need to match in order
12869 to pass verification of stack_pointer_offset at the end. */
12870 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12871 m->fs.sp_valid = false;
12874 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12876 if (flag_stack_usage_info)
12878 /* We start to count from ARG_POINTER. */
12879 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12881 /* If it was realigned, take into account the fake frame. */
12882 if (stack_realign_drap)
12884 if (ix86_static_chain_on_stack)
12885 stack_size += UNITS_PER_WORD;
12887 if (!call_used_regs[REGNO (crtl->drap_reg)])
12888 stack_size += UNITS_PER_WORD;
12890 /* This over-estimates by 1 minimal-stack-alignment-unit but
12891 mitigates that by counting in the new return address slot. */
12892 current_function_dynamic_stack_size
12893 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12896 current_function_static_stack_size = stack_size;
12899 /* On SEH target with very large frame size, allocate an area to save
12900 SSE registers (as the very large allocation won't be described). */
12902 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12903 && !sse_registers_saved)
12905 HOST_WIDE_INT sse_size =
12906 frame.sse_reg_save_offset - frame.reg_save_offset;
12908 gcc_assert (int_registers_saved);
12910 /* No need to do stack checking as the area will be immediately
12912 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12913 GEN_INT (-sse_size), -1,
12914 m->fs.cfa_reg == stack_pointer_rtx);
12915 allocate -= sse_size;
12916 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12917 sse_registers_saved = true;
12920 /* The stack has already been decremented by the instruction calling us
12921 so probe if the size is non-negative to preserve the protection area. */
12922 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12924 /* We expect the registers to be saved when probes are used. */
12925 gcc_assert (int_registers_saved);
12927 if (STACK_CHECK_MOVING_SP)
12929 if (!(crtl->is_leaf && !cfun->calls_alloca
12930 && allocate <= PROBE_INTERVAL))
12932 ix86_adjust_stack_and_probe (allocate);
12938 HOST_WIDE_INT size = allocate;
12940 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12941 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12943 if (TARGET_STACK_PROBE)
12945 if (crtl->is_leaf && !cfun->calls_alloca)
12947 if (size > PROBE_INTERVAL)
12948 ix86_emit_probe_stack_range (0, size);
12951 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12955 if (crtl->is_leaf && !cfun->calls_alloca)
12957 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12958 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12959 size - STACK_CHECK_PROTECT);
12962 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
12969 else if (!ix86_target_stack_probe ()
12970 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
12972 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12973 GEN_INT (-allocate), -1,
12974 m->fs.cfa_reg == stack_pointer_rtx);
12978 rtx eax = gen_rtx_REG (Pmode, AX_REG);
12980 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
12981 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
12982 bool eax_live = ix86_eax_live_at_start_p ();
12983 bool r10_live = false;
12986 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
12990 insn = emit_insn (gen_push (eax));
12991 allocate -= UNITS_PER_WORD;
12992 /* Note that SEH directives need to continue tracking the stack
12993 pointer even after the frame pointer has been set up. */
12994 if (sp_is_cfa_reg || TARGET_SEH)
12997 m->fs.cfa_offset += UNITS_PER_WORD;
12998 RTX_FRAME_RELATED_P (insn) = 1;
12999 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13000 gen_rtx_SET (stack_pointer_rtx,
13001 plus_constant (Pmode, stack_pointer_rtx,
13002 -UNITS_PER_WORD)));
13008 r10 = gen_rtx_REG (Pmode, R10_REG);
13009 insn = emit_insn (gen_push (r10));
13010 allocate -= UNITS_PER_WORD;
13011 if (sp_is_cfa_reg || TARGET_SEH)
13014 m->fs.cfa_offset += UNITS_PER_WORD;
13015 RTX_FRAME_RELATED_P (insn) = 1;
13016 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13017 gen_rtx_SET (stack_pointer_rtx,
13018 plus_constant (Pmode, stack_pointer_rtx,
13019 -UNITS_PER_WORD)));
13023 emit_move_insn (eax, GEN_INT (allocate));
13024 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13026 /* Use the fact that AX still contains ALLOCATE. */
13027 adjust_stack_insn = (Pmode == DImode
13028 ? gen_pro_epilogue_adjust_stack_di_sub
13029 : gen_pro_epilogue_adjust_stack_si_sub);
13031 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13032 stack_pointer_rtx, eax));
13034 if (sp_is_cfa_reg || TARGET_SEH)
13037 m->fs.cfa_offset += allocate;
13038 RTX_FRAME_RELATED_P (insn) = 1;
13039 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13040 gen_rtx_SET (stack_pointer_rtx,
13041 plus_constant (Pmode, stack_pointer_rtx,
13044 m->fs.sp_offset += allocate;
13046 /* Use stack_pointer_rtx for relative addressing so that code
13047 works for realigned stack, too. */
13048 if (r10_live && eax_live)
13050 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13051 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13052 gen_frame_mem (word_mode, t));
13053 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13054 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13055 gen_frame_mem (word_mode, t));
13057 else if (eax_live || r10_live)
13059 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13060 emit_move_insn (gen_rtx_REG (word_mode,
13061 (eax_live ? AX_REG : R10_REG)),
13062 gen_frame_mem (word_mode, t));
13065 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13067 /* If we havn't already set up the frame pointer, do so now. */
13068 if (frame_pointer_needed && !m->fs.fp_valid)
13070 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13071 GEN_INT (frame.stack_pointer_offset
13072 - frame.hard_frame_pointer_offset));
13073 insn = emit_insn (insn);
13074 RTX_FRAME_RELATED_P (insn) = 1;
13075 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13077 if (m->fs.cfa_reg == stack_pointer_rtx)
13078 m->fs.cfa_reg = hard_frame_pointer_rtx;
13079 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13080 m->fs.fp_valid = true;
13083 if (!int_registers_saved)
13084 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13085 if (!sse_registers_saved)
13086 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13088 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13090 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13092 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13093 insn = emit_insn (gen_set_got (pic));
13094 RTX_FRAME_RELATED_P (insn) = 1;
13095 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13096 emit_insn (gen_prologue_use (pic));
13097 /* Deleting already emmitted SET_GOT if exist and allocated to
13098 REAL_PIC_OFFSET_TABLE_REGNUM. */
13099 ix86_elim_entry_set_got (pic);
13102 if (crtl->drap_reg && !crtl->stack_realign_needed)
13104 /* vDRAP is setup but after reload it turns out stack realign
13105 isn't necessary, here we will emit prologue to setup DRAP
13106 without stack realign adjustment */
13107 t = choose_baseaddr (0);
13108 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13111 /* Prevent instructions from being scheduled into register save push
13112 sequence when access to the redzone area is done through frame pointer.
13113 The offset between the frame pointer and the stack pointer is calculated
13114 relative to the value of the stack pointer at the end of the function
13115 prologue, and moving instructions that access redzone area via frame
13116 pointer inside push sequence violates this assumption. */
13117 if (frame_pointer_needed && frame.red_zone_size)
13118 emit_insn (gen_memory_blockage ());
13120 /* Emit cld instruction if stringops are used in the function. */
13121 if (TARGET_CLD && ix86_current_function_needs_cld)
13122 emit_insn (gen_cld ());
13124 /* SEH requires that the prologue end within 256 bytes of the start of
13125 the function. Prevent instruction schedules that would extend that.
13126 Further, prevent alloca modifications to the stack pointer from being
13127 combined with prologue modifications. */
13129 emit_insn (gen_prologue_use (stack_pointer_rtx));
13132 /* Emit code to restore REG using a POP insn. */
13135 ix86_emit_restore_reg_using_pop (rtx reg)
13137 struct machine_function *m = cfun->machine;
13138 rtx_insn *insn = emit_insn (gen_pop (reg));
13140 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13141 m->fs.sp_offset -= UNITS_PER_WORD;
13143 if (m->fs.cfa_reg == crtl->drap_reg
13144 && REGNO (reg) == REGNO (crtl->drap_reg))
13146 /* Previously we'd represented the CFA as an expression
13147 like *(%ebp - 8). We've just popped that value from
13148 the stack, which means we need to reset the CFA to
13149 the drap register. This will remain until we restore
13150 the stack pointer. */
13151 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13152 RTX_FRAME_RELATED_P (insn) = 1;
13154 /* This means that the DRAP register is valid for addressing too. */
13155 m->fs.drap_valid = true;
13159 if (m->fs.cfa_reg == stack_pointer_rtx)
13161 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13162 x = gen_rtx_SET (stack_pointer_rtx, x);
13163 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13164 RTX_FRAME_RELATED_P (insn) = 1;
13166 m->fs.cfa_offset -= UNITS_PER_WORD;
13169 /* When the frame pointer is the CFA, and we pop it, we are
13170 swapping back to the stack pointer as the CFA. This happens
13171 for stack frames that don't allocate other data, so we assume
13172 the stack pointer is now pointing at the return address, i.e.
13173 the function entry state, which makes the offset be 1 word. */
13174 if (reg == hard_frame_pointer_rtx)
13176 m->fs.fp_valid = false;
13177 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13179 m->fs.cfa_reg = stack_pointer_rtx;
13180 m->fs.cfa_offset -= UNITS_PER_WORD;
13182 add_reg_note (insn, REG_CFA_DEF_CFA,
13183 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13184 GEN_INT (m->fs.cfa_offset)));
13185 RTX_FRAME_RELATED_P (insn) = 1;
13190 /* Emit code to restore saved registers using POP insns. */
13193 ix86_emit_restore_regs_using_pop (void)
13195 unsigned int regno;
13197 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13198 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13199 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13202 /* Emit code and notes for the LEAVE instruction. */
13205 ix86_emit_leave (void)
13207 struct machine_function *m = cfun->machine;
13208 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13210 ix86_add_queued_cfa_restore_notes (insn);
13212 gcc_assert (m->fs.fp_valid);
13213 m->fs.sp_valid = true;
13214 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13215 m->fs.fp_valid = false;
13217 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13219 m->fs.cfa_reg = stack_pointer_rtx;
13220 m->fs.cfa_offset = m->fs.sp_offset;
13222 add_reg_note (insn, REG_CFA_DEF_CFA,
13223 plus_constant (Pmode, stack_pointer_rtx,
13225 RTX_FRAME_RELATED_P (insn) = 1;
13227 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13231 /* Emit code to restore saved registers using MOV insns.
13232 First register is restored from CFA - CFA_OFFSET. */
13234 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13235 bool maybe_eh_return)
13237 struct machine_function *m = cfun->machine;
13238 unsigned int regno;
13240 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13241 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13243 rtx reg = gen_rtx_REG (word_mode, regno);
13247 mem = choose_baseaddr (cfa_offset);
13248 mem = gen_frame_mem (word_mode, mem);
13249 insn = emit_move_insn (reg, mem);
13251 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13253 /* Previously we'd represented the CFA as an expression
13254 like *(%ebp - 8). We've just popped that value from
13255 the stack, which means we need to reset the CFA to
13256 the drap register. This will remain until we restore
13257 the stack pointer. */
13258 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13259 RTX_FRAME_RELATED_P (insn) = 1;
13261 /* This means that the DRAP register is valid for addressing. */
13262 m->fs.drap_valid = true;
13265 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13267 cfa_offset -= UNITS_PER_WORD;
13271 /* Emit code to restore saved registers using MOV insns.
13272 First register is restored from CFA - CFA_OFFSET. */
13274 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13275 bool maybe_eh_return)
13277 unsigned int regno;
13279 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13280 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13282 rtx reg = gen_rtx_REG (V4SFmode, regno);
13284 unsigned int align;
13286 mem = choose_baseaddr (cfa_offset);
13287 mem = gen_rtx_MEM (V4SFmode, mem);
13289 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13290 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13291 set_mem_align (mem, align);
13293 /* SSE saves are not within re-aligned local stack frame.
13294 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13295 to emit unaligned load. */
13298 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13300 emit_insn (gen_rtx_SET (reg, unspec));
13303 emit_insn (gen_rtx_SET (reg, mem));
13305 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13307 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13311 /* Restore function stack, frame, and registers. */
13314 ix86_expand_epilogue (int style)
13316 struct machine_function *m = cfun->machine;
13317 struct machine_frame_state frame_state_save = m->fs;
13318 struct ix86_frame frame;
13319 bool restore_regs_via_mov;
13322 ix86_finalize_stack_realign_flags ();
13323 ix86_compute_frame_layout (&frame);
13325 m->fs.sp_valid = (!frame_pointer_needed
13326 || (crtl->sp_is_unchanging
13327 && !stack_realign_fp));
13328 gcc_assert (!m->fs.sp_valid
13329 || m->fs.sp_offset == frame.stack_pointer_offset);
13331 /* The FP must be valid if the frame pointer is present. */
13332 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13333 gcc_assert (!m->fs.fp_valid
13334 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13336 /* We must have *some* valid pointer to the stack frame. */
13337 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13339 /* The DRAP is never valid at this point. */
13340 gcc_assert (!m->fs.drap_valid);
13342 /* See the comment about red zone and frame
13343 pointer usage in ix86_expand_prologue. */
13344 if (frame_pointer_needed && frame.red_zone_size)
13345 emit_insn (gen_memory_blockage ());
13347 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13348 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13350 /* Determine the CFA offset of the end of the red-zone. */
13351 m->fs.red_zone_offset = 0;
13352 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13354 /* The red-zone begins below the return address. */
13355 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13357 /* When the register save area is in the aligned portion of
13358 the stack, determine the maximum runtime displacement that
13359 matches up with the aligned frame. */
13360 if (stack_realign_drap)
13361 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13365 /* Special care must be taken for the normal return case of a function
13366 using eh_return: the eax and edx registers are marked as saved, but
13367 not restored along this path. Adjust the save location to match. */
13368 if (crtl->calls_eh_return && style != 2)
13369 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13371 /* EH_RETURN requires the use of moves to function properly. */
13372 if (crtl->calls_eh_return)
13373 restore_regs_via_mov = true;
13374 /* SEH requires the use of pops to identify the epilogue. */
13375 else if (TARGET_SEH)
13376 restore_regs_via_mov = false;
13377 /* If we're only restoring one register and sp is not valid then
13378 using a move instruction to restore the register since it's
13379 less work than reloading sp and popping the register. */
13380 else if (!m->fs.sp_valid && frame.nregs <= 1)
13381 restore_regs_via_mov = true;
13382 else if (TARGET_EPILOGUE_USING_MOVE
13383 && cfun->machine->use_fast_prologue_epilogue
13384 && (frame.nregs > 1
13385 || m->fs.sp_offset != frame.reg_save_offset))
13386 restore_regs_via_mov = true;
13387 else if (frame_pointer_needed
13389 && m->fs.sp_offset != frame.reg_save_offset)
13390 restore_regs_via_mov = true;
13391 else if (frame_pointer_needed
13392 && TARGET_USE_LEAVE
13393 && cfun->machine->use_fast_prologue_epilogue
13394 && frame.nregs == 1)
13395 restore_regs_via_mov = true;
13397 restore_regs_via_mov = false;
13399 if (restore_regs_via_mov || frame.nsseregs)
13401 /* Ensure that the entire register save area is addressable via
13402 the stack pointer, if we will restore via sp. */
13404 && m->fs.sp_offset > 0x7fffffff
13405 && !(m->fs.fp_valid || m->fs.drap_valid)
13406 && (frame.nsseregs + frame.nregs) != 0)
13408 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13409 GEN_INT (m->fs.sp_offset
13410 - frame.sse_reg_save_offset),
13412 m->fs.cfa_reg == stack_pointer_rtx);
13416 /* If there are any SSE registers to restore, then we have to do it
13417 via moves, since there's obviously no pop for SSE regs. */
13418 if (frame.nsseregs)
13419 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13422 if (restore_regs_via_mov)
13427 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13429 /* eh_return epilogues need %ecx added to the stack pointer. */
13432 rtx sa = EH_RETURN_STACKADJ_RTX;
13435 /* Stack align doesn't work with eh_return. */
13436 gcc_assert (!stack_realign_drap);
13437 /* Neither does regparm nested functions. */
13438 gcc_assert (!ix86_static_chain_on_stack);
13440 if (frame_pointer_needed)
13442 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13443 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13444 emit_insn (gen_rtx_SET (sa, t));
13446 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13447 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13449 /* Note that we use SA as a temporary CFA, as the return
13450 address is at the proper place relative to it. We
13451 pretend this happens at the FP restore insn because
13452 prior to this insn the FP would be stored at the wrong
13453 offset relative to SA, and after this insn we have no
13454 other reasonable register to use for the CFA. We don't
13455 bother resetting the CFA to the SP for the duration of
13456 the return insn. */
13457 add_reg_note (insn, REG_CFA_DEF_CFA,
13458 plus_constant (Pmode, sa, UNITS_PER_WORD));
13459 ix86_add_queued_cfa_restore_notes (insn);
13460 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13461 RTX_FRAME_RELATED_P (insn) = 1;
13463 m->fs.cfa_reg = sa;
13464 m->fs.cfa_offset = UNITS_PER_WORD;
13465 m->fs.fp_valid = false;
13467 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13468 const0_rtx, style, false);
13472 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13473 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13474 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13475 ix86_add_queued_cfa_restore_notes (insn);
13477 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13478 if (m->fs.cfa_offset != UNITS_PER_WORD)
13480 m->fs.cfa_offset = UNITS_PER_WORD;
13481 add_reg_note (insn, REG_CFA_DEF_CFA,
13482 plus_constant (Pmode, stack_pointer_rtx,
13484 RTX_FRAME_RELATED_P (insn) = 1;
13487 m->fs.sp_offset = UNITS_PER_WORD;
13488 m->fs.sp_valid = true;
13493 /* SEH requires that the function end with (1) a stack adjustment
13494 if necessary, (2) a sequence of pops, and (3) a return or
13495 jump instruction. Prevent insns from the function body from
13496 being scheduled into this sequence. */
13499 /* Prevent a catch region from being adjacent to the standard
13500 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13501 several other flags that would be interesting to test are
13503 if (flag_non_call_exceptions)
13504 emit_insn (gen_nops (const1_rtx));
13506 emit_insn (gen_blockage ());
13509 /* First step is to deallocate the stack frame so that we can
13510 pop the registers. Also do it on SEH target for very large
13511 frame as the emitted instructions aren't allowed by the ABI in
13513 if (!m->fs.sp_valid
13515 && (m->fs.sp_offset - frame.reg_save_offset
13516 >= SEH_MAX_FRAME_SIZE)))
13518 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13519 GEN_INT (m->fs.fp_offset
13520 - frame.reg_save_offset),
13523 else if (m->fs.sp_offset != frame.reg_save_offset)
13525 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13526 GEN_INT (m->fs.sp_offset
13527 - frame.reg_save_offset),
13529 m->fs.cfa_reg == stack_pointer_rtx);
13532 ix86_emit_restore_regs_using_pop ();
13535 /* If we used a stack pointer and haven't already got rid of it,
13537 if (m->fs.fp_valid)
13539 /* If the stack pointer is valid and pointing at the frame
13540 pointer store address, then we only need a pop. */
13541 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13542 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13543 /* Leave results in shorter dependency chains on CPUs that are
13544 able to grok it fast. */
13545 else if (TARGET_USE_LEAVE
13546 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13547 || !cfun->machine->use_fast_prologue_epilogue)
13548 ix86_emit_leave ();
13551 pro_epilogue_adjust_stack (stack_pointer_rtx,
13552 hard_frame_pointer_rtx,
13553 const0_rtx, style, !using_drap);
13554 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13560 int param_ptr_offset = UNITS_PER_WORD;
13563 gcc_assert (stack_realign_drap);
13565 if (ix86_static_chain_on_stack)
13566 param_ptr_offset += UNITS_PER_WORD;
13567 if (!call_used_regs[REGNO (crtl->drap_reg)])
13568 param_ptr_offset += UNITS_PER_WORD;
13570 insn = emit_insn (gen_rtx_SET
13571 (stack_pointer_rtx,
13572 gen_rtx_PLUS (Pmode,
13574 GEN_INT (-param_ptr_offset))));
13575 m->fs.cfa_reg = stack_pointer_rtx;
13576 m->fs.cfa_offset = param_ptr_offset;
13577 m->fs.sp_offset = param_ptr_offset;
13578 m->fs.realigned = false;
13580 add_reg_note (insn, REG_CFA_DEF_CFA,
13581 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13582 GEN_INT (param_ptr_offset)));
13583 RTX_FRAME_RELATED_P (insn) = 1;
13585 if (!call_used_regs[REGNO (crtl->drap_reg)])
13586 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13589 /* At this point the stack pointer must be valid, and we must have
13590 restored all of the registers. We may not have deallocated the
13591 entire stack frame. We've delayed this until now because it may
13592 be possible to merge the local stack deallocation with the
13593 deallocation forced by ix86_static_chain_on_stack. */
13594 gcc_assert (m->fs.sp_valid);
13595 gcc_assert (!m->fs.fp_valid);
13596 gcc_assert (!m->fs.realigned);
13597 if (m->fs.sp_offset != UNITS_PER_WORD)
13599 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13600 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13604 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13606 /* Sibcall epilogues don't want a return instruction. */
13609 m->fs = frame_state_save;
13613 if (crtl->args.pops_args && crtl->args.size)
13615 rtx popc = GEN_INT (crtl->args.pops_args);
13617 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13618 address, do explicit add, and jump indirectly to the caller. */
13620 if (crtl->args.pops_args >= 65536)
13622 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13625 /* There is no "pascal" calling convention in any 64bit ABI. */
13626 gcc_assert (!TARGET_64BIT);
13628 insn = emit_insn (gen_pop (ecx));
13629 m->fs.cfa_offset -= UNITS_PER_WORD;
13630 m->fs.sp_offset -= UNITS_PER_WORD;
13632 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13633 x = gen_rtx_SET (stack_pointer_rtx, x);
13634 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13635 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13636 RTX_FRAME_RELATED_P (insn) = 1;
13638 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13640 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13643 emit_jump_insn (gen_simple_return_pop_internal (popc));
13646 emit_jump_insn (gen_simple_return_internal ());
13648 /* Restore the state back to the state from the prologue,
13649 so that it's correct for the next epilogue. */
13650 m->fs = frame_state_save;
13653 /* Reset from the function's potential modifications. */
13656 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13658 if (pic_offset_table_rtx
13659 && !ix86_use_pseudo_pic_reg ())
13660 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13662 /* Mach-O doesn't support labels at the end of objects, so if
13663 it looks like we might want one, insert a NOP. */
13665 rtx_insn *insn = get_last_insn ();
13666 rtx_insn *deleted_debug_label = NULL;
13669 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13671 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13672 notes only, instead set their CODE_LABEL_NUMBER to -1,
13673 otherwise there would be code generation differences
13674 in between -g and -g0. */
13675 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13676 deleted_debug_label = insn;
13677 insn = PREV_INSN (insn);
13682 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13683 fputs ("\tnop\n", file);
13684 else if (deleted_debug_label)
13685 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13686 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13687 CODE_LABEL_NUMBER (insn) = -1;
13693 /* Return a scratch register to use in the split stack prologue. The
13694 split stack prologue is used for -fsplit-stack. It is the first
13695 instructions in the function, even before the regular prologue.
13696 The scratch register can be any caller-saved register which is not
13697 used for parameters or for the static chain. */
13699 static unsigned int
13700 split_stack_prologue_scratch_regno (void)
13706 bool is_fastcall, is_thiscall;
13709 is_fastcall = (lookup_attribute ("fastcall",
13710 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13712 is_thiscall = (lookup_attribute ("thiscall",
13713 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13715 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13719 if (DECL_STATIC_CHAIN (cfun->decl))
13721 sorry ("-fsplit-stack does not support fastcall with "
13722 "nested function");
13723 return INVALID_REGNUM;
13727 else if (is_thiscall)
13729 if (!DECL_STATIC_CHAIN (cfun->decl))
13733 else if (regparm < 3)
13735 if (!DECL_STATIC_CHAIN (cfun->decl))
13741 sorry ("-fsplit-stack does not support 2 register "
13742 "parameters for a nested function");
13743 return INVALID_REGNUM;
13750 /* FIXME: We could make this work by pushing a register
13751 around the addition and comparison. */
13752 sorry ("-fsplit-stack does not support 3 register parameters");
13753 return INVALID_REGNUM;
13758 /* A SYMBOL_REF for the function which allocates new stackspace for
13761 static GTY(()) rtx split_stack_fn;
13763 /* A SYMBOL_REF for the more stack function when using the large
13766 static GTY(()) rtx split_stack_fn_large;
13768 /* Handle -fsplit-stack. These are the first instructions in the
13769 function, even before the regular prologue. */
13772 ix86_expand_split_stack_prologue (void)
13774 struct ix86_frame frame;
13775 HOST_WIDE_INT allocate;
13776 unsigned HOST_WIDE_INT args_size;
13777 rtx_code_label *label;
13778 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13779 rtx scratch_reg = NULL_RTX;
13780 rtx_code_label *varargs_label = NULL;
13783 gcc_assert (flag_split_stack && reload_completed);
13785 ix86_finalize_stack_realign_flags ();
13786 ix86_compute_frame_layout (&frame);
13787 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13789 /* This is the label we will branch to if we have enough stack
13790 space. We expect the basic block reordering pass to reverse this
13791 branch if optimizing, so that we branch in the unlikely case. */
13792 label = gen_label_rtx ();
13794 /* We need to compare the stack pointer minus the frame size with
13795 the stack boundary in the TCB. The stack boundary always gives
13796 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13797 can compare directly. Otherwise we need to do an addition. */
13799 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13800 UNSPEC_STACK_CHECK);
13801 limit = gen_rtx_CONST (Pmode, limit);
13802 limit = gen_rtx_MEM (Pmode, limit);
13803 if (allocate < SPLIT_STACK_AVAILABLE)
13804 current = stack_pointer_rtx;
13807 unsigned int scratch_regno;
13810 /* We need a scratch register to hold the stack pointer minus
13811 the required frame size. Since this is the very start of the
13812 function, the scratch register can be any caller-saved
13813 register which is not used for parameters. */
13814 offset = GEN_INT (- allocate);
13815 scratch_regno = split_stack_prologue_scratch_regno ();
13816 if (scratch_regno == INVALID_REGNUM)
13818 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13819 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13821 /* We don't use ix86_gen_add3 in this case because it will
13822 want to split to lea, but when not optimizing the insn
13823 will not be split after this point. */
13824 emit_insn (gen_rtx_SET (scratch_reg,
13825 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13830 emit_move_insn (scratch_reg, offset);
13831 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13832 stack_pointer_rtx));
13834 current = scratch_reg;
13837 ix86_expand_branch (GEU, current, limit, label);
13838 jump_insn = get_last_insn ();
13839 JUMP_LABEL (jump_insn) = label;
13841 /* Mark the jump as very likely to be taken. */
13842 add_int_reg_note (jump_insn, REG_BR_PROB,
13843 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13845 if (split_stack_fn == NULL_RTX)
13847 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13848 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13850 fn = split_stack_fn;
13852 /* Get more stack space. We pass in the desired stack space and the
13853 size of the arguments to copy to the new stack. In 32-bit mode
13854 we push the parameters; __morestack will return on a new stack
13855 anyhow. In 64-bit mode we pass the parameters in r10 and
13857 allocate_rtx = GEN_INT (allocate);
13858 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13859 call_fusage = NULL_RTX;
13864 reg10 = gen_rtx_REG (Pmode, R10_REG);
13865 reg11 = gen_rtx_REG (Pmode, R11_REG);
13867 /* If this function uses a static chain, it will be in %r10.
13868 Preserve it across the call to __morestack. */
13869 if (DECL_STATIC_CHAIN (cfun->decl))
13873 rax = gen_rtx_REG (word_mode, AX_REG);
13874 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13875 use_reg (&call_fusage, rax);
13878 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13881 HOST_WIDE_INT argval;
13883 gcc_assert (Pmode == DImode);
13884 /* When using the large model we need to load the address
13885 into a register, and we've run out of registers. So we
13886 switch to a different calling convention, and we call a
13887 different function: __morestack_large. We pass the
13888 argument size in the upper 32 bits of r10 and pass the
13889 frame size in the lower 32 bits. */
13890 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13891 gcc_assert ((args_size & 0xffffffff) == args_size);
13893 if (split_stack_fn_large == NULL_RTX)
13895 split_stack_fn_large =
13896 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13897 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13899 if (ix86_cmodel == CM_LARGE_PIC)
13901 rtx_code_label *label;
13904 label = gen_label_rtx ();
13905 emit_label (label);
13906 LABEL_PRESERVE_P (label) = 1;
13907 emit_insn (gen_set_rip_rex64 (reg10, label));
13908 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13909 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13910 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13912 x = gen_rtx_CONST (Pmode, x);
13913 emit_move_insn (reg11, x);
13914 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13915 x = gen_const_mem (Pmode, x);
13916 emit_move_insn (reg11, x);
13919 emit_move_insn (reg11, split_stack_fn_large);
13923 argval = ((args_size << 16) << 16) + allocate;
13924 emit_move_insn (reg10, GEN_INT (argval));
13928 emit_move_insn (reg10, allocate_rtx);
13929 emit_move_insn (reg11, GEN_INT (args_size));
13930 use_reg (&call_fusage, reg11);
13933 use_reg (&call_fusage, reg10);
13937 emit_insn (gen_push (GEN_INT (args_size)));
13938 emit_insn (gen_push (allocate_rtx));
13940 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13941 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13943 add_function_usage_to (call_insn, call_fusage);
13945 /* In order to make call/return prediction work right, we now need
13946 to execute a return instruction. See
13947 libgcc/config/i386/morestack.S for the details on how this works.
13949 For flow purposes gcc must not see this as a return
13950 instruction--we need control flow to continue at the subsequent
13951 label. Therefore, we use an unspec. */
13952 gcc_assert (crtl->args.pops_args < 65536);
13953 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13955 /* If we are in 64-bit mode and this function uses a static chain,
13956 we saved %r10 in %rax before calling _morestack. */
13957 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13958 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13959 gen_rtx_REG (word_mode, AX_REG));
13961 /* If this function calls va_start, we need to store a pointer to
13962 the arguments on the old stack, because they may not have been
13963 all copied to the new stack. At this point the old stack can be
13964 found at the frame pointer value used by __morestack, because
13965 __morestack has set that up before calling back to us. Here we
13966 store that pointer in a scratch register, and in
13967 ix86_expand_prologue we store the scratch register in a stack
13969 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13971 unsigned int scratch_regno;
13975 scratch_regno = split_stack_prologue_scratch_regno ();
13976 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13977 frame_reg = gen_rtx_REG (Pmode, BP_REG);
13981 return address within this function
13982 return address of caller of this function
13984 So we add three words to get to the stack arguments.
13988 return address within this function
13989 first argument to __morestack
13990 second argument to __morestack
13991 return address of caller of this function
13993 So we add five words to get to the stack arguments.
13995 words = TARGET_64BIT ? 3 : 5;
13996 emit_insn (gen_rtx_SET (scratch_reg,
13997 gen_rtx_PLUS (Pmode, frame_reg,
13998 GEN_INT (words * UNITS_PER_WORD))));
14000 varargs_label = gen_label_rtx ();
14001 emit_jump_insn (gen_jump (varargs_label));
14002 JUMP_LABEL (get_last_insn ()) = varargs_label;
14007 emit_label (label);
14008 LABEL_NUSES (label) = 1;
14010 /* If this function calls va_start, we now have to set the scratch
14011 register for the case where we do not call __morestack. In this
14012 case we need to set it based on the stack pointer. */
14013 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14015 emit_insn (gen_rtx_SET (scratch_reg,
14016 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14017 GEN_INT (UNITS_PER_WORD))));
14019 emit_label (varargs_label);
14020 LABEL_NUSES (varargs_label) = 1;
14024 /* We may have to tell the dataflow pass that the split stack prologue
14025 is initializing a scratch register. */
14028 ix86_live_on_entry (bitmap regs)
14030 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14032 gcc_assert (flag_split_stack);
14033 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14037 /* Extract the parts of an RTL expression that is a valid memory address
14038 for an instruction. Return 0 if the structure of the address is
14039 grossly off. Return -1 if the address contains ASHIFT, so it is not
14040 strictly valid, but still used for computing length of lea instruction. */
14043 ix86_decompose_address (rtx addr, struct ix86_address *out)
14045 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14046 rtx base_reg, index_reg;
14047 HOST_WIDE_INT scale = 1;
14048 rtx scale_rtx = NULL_RTX;
14051 addr_space_t seg = ADDR_SPACE_GENERIC;
14053 /* Allow zero-extended SImode addresses,
14054 they will be emitted with addr32 prefix. */
14055 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14057 if (GET_CODE (addr) == ZERO_EXTEND
14058 && GET_MODE (XEXP (addr, 0)) == SImode)
14060 addr = XEXP (addr, 0);
14061 if (CONST_INT_P (addr))
14064 else if (GET_CODE (addr) == AND
14065 && const_32bit_mask (XEXP (addr, 1), DImode))
14067 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
14068 if (addr == NULL_RTX)
14071 if (CONST_INT_P (addr))
14076 /* Allow SImode subregs of DImode addresses,
14077 they will be emitted with addr32 prefix. */
14078 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14080 if (SUBREG_P (addr)
14081 && GET_MODE (SUBREG_REG (addr)) == DImode)
14083 addr = SUBREG_REG (addr);
14084 if (CONST_INT_P (addr))
14091 else if (SUBREG_P (addr))
14093 if (REG_P (SUBREG_REG (addr)))
14098 else if (GET_CODE (addr) == PLUS)
14100 rtx addends[4], op;
14108 addends[n++] = XEXP (op, 1);
14111 while (GET_CODE (op) == PLUS);
14116 for (i = n; i >= 0; --i)
14119 switch (GET_CODE (op))
14124 index = XEXP (op, 0);
14125 scale_rtx = XEXP (op, 1);
14131 index = XEXP (op, 0);
14132 tmp = XEXP (op, 1);
14133 if (!CONST_INT_P (tmp))
14135 scale = INTVAL (tmp);
14136 if ((unsigned HOST_WIDE_INT) scale > 3)
14138 scale = 1 << scale;
14143 if (GET_CODE (op) != UNSPEC)
14148 if (XINT (op, 1) == UNSPEC_TP
14149 && TARGET_TLS_DIRECT_SEG_REFS
14150 && seg == ADDR_SPACE_GENERIC)
14151 seg = DEFAULT_TLS_SEG_REG;
14157 if (!REG_P (SUBREG_REG (op)))
14184 else if (GET_CODE (addr) == MULT)
14186 index = XEXP (addr, 0); /* index*scale */
14187 scale_rtx = XEXP (addr, 1);
14189 else if (GET_CODE (addr) == ASHIFT)
14191 /* We're called for lea too, which implements ashift on occasion. */
14192 index = XEXP (addr, 0);
14193 tmp = XEXP (addr, 1);
14194 if (!CONST_INT_P (tmp))
14196 scale = INTVAL (tmp);
14197 if ((unsigned HOST_WIDE_INT) scale > 3)
14199 scale = 1 << scale;
14203 disp = addr; /* displacement */
14209 else if (SUBREG_P (index)
14210 && REG_P (SUBREG_REG (index)))
14216 /* Extract the integral value of scale. */
14219 if (!CONST_INT_P (scale_rtx))
14221 scale = INTVAL (scale_rtx);
14224 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14225 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14227 /* Avoid useless 0 displacement. */
14228 if (disp == const0_rtx && (base || index))
14231 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14232 if (base_reg && index_reg && scale == 1
14233 && (index_reg == arg_pointer_rtx
14234 || index_reg == frame_pointer_rtx
14235 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14237 std::swap (base, index);
14238 std::swap (base_reg, index_reg);
14241 /* Special case: %ebp cannot be encoded as a base without a displacement.
14245 && (base_reg == hard_frame_pointer_rtx
14246 || base_reg == frame_pointer_rtx
14247 || base_reg == arg_pointer_rtx
14248 || (REG_P (base_reg)
14249 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14250 || REGNO (base_reg) == R13_REG))))
14253 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14254 Avoid this by transforming to [%esi+0].
14255 Reload calls address legitimization without cfun defined, so we need
14256 to test cfun for being non-NULL. */
14257 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14258 && base_reg && !index_reg && !disp
14259 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14262 /* Special case: encode reg+reg instead of reg*2. */
14263 if (!base && index && scale == 2)
14264 base = index, base_reg = index_reg, scale = 1;
14266 /* Special case: scaling cannot be encoded without base or displacement. */
14267 if (!base && !disp && index && scale != 1)
14271 out->index = index;
14273 out->scale = scale;
14279 /* Return cost of the memory address x.
14280 For i386, it is better to use a complex address than let gcc copy
14281 the address into a reg and make a new pseudo. But not if the address
14282 requires to two regs - that would mean more pseudos with longer
14285 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14287 struct ix86_address parts;
14289 int ok = ix86_decompose_address (x, &parts);
14293 if (parts.base && SUBREG_P (parts.base))
14294 parts.base = SUBREG_REG (parts.base);
14295 if (parts.index && SUBREG_P (parts.index))
14296 parts.index = SUBREG_REG (parts.index);
14298 /* Attempt to minimize number of registers in the address by increasing
14299 address cost for each used register. We don't increase address cost
14300 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14301 is not invariant itself it most likely means that base or index is not
14302 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14303 which is not profitable for x86. */
14305 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14306 && (current_pass->type == GIMPLE_PASS
14307 || !pic_offset_table_rtx
14308 || !REG_P (parts.base)
14309 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14313 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14314 && (current_pass->type == GIMPLE_PASS
14315 || !pic_offset_table_rtx
14316 || !REG_P (parts.index)
14317 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14320 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14321 since it's predecode logic can't detect the length of instructions
14322 and it degenerates to vector decoded. Increase cost of such
14323 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14324 to split such addresses or even refuse such addresses at all.
14326 Following addressing modes are affected:
14331 The first and last case may be avoidable by explicitly coding the zero in
14332 memory address, but I don't have AMD-K6 machine handy to check this
14336 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14337 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14338 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14344 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14345 this is used for to form addresses to local data when -fPIC is in
14349 darwin_local_data_pic (rtx disp)
14351 return (GET_CODE (disp) == UNSPEC
14352 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14355 /* Determine if a given RTX is a valid constant. We already know this
14356 satisfies CONSTANT_P. */
14359 ix86_legitimate_constant_p (machine_mode, rtx x)
14361 /* Pointer bounds constants are not valid. */
14362 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14365 switch (GET_CODE (x))
14370 if (GET_CODE (x) == PLUS)
14372 if (!CONST_INT_P (XEXP (x, 1)))
14377 if (TARGET_MACHO && darwin_local_data_pic (x))
14380 /* Only some unspecs are valid as "constants". */
14381 if (GET_CODE (x) == UNSPEC)
14382 switch (XINT (x, 1))
14385 case UNSPEC_GOTOFF:
14386 case UNSPEC_PLTOFF:
14387 return TARGET_64BIT;
14389 case UNSPEC_NTPOFF:
14390 x = XVECEXP (x, 0, 0);
14391 return (GET_CODE (x) == SYMBOL_REF
14392 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14393 case UNSPEC_DTPOFF:
14394 x = XVECEXP (x, 0, 0);
14395 return (GET_CODE (x) == SYMBOL_REF
14396 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14401 /* We must have drilled down to a symbol. */
14402 if (GET_CODE (x) == LABEL_REF)
14404 if (GET_CODE (x) != SYMBOL_REF)
14409 /* TLS symbols are never valid. */
14410 if (SYMBOL_REF_TLS_MODEL (x))
14413 /* DLLIMPORT symbols are never valid. */
14414 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14415 && SYMBOL_REF_DLLIMPORT_P (x))
14419 /* mdynamic-no-pic */
14420 if (MACHO_DYNAMIC_NO_PIC_P)
14421 return machopic_symbol_defined_p (x);
14425 case CONST_WIDE_INT:
14426 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14431 if (!standard_sse_constant_p (x))
14438 /* Otherwise we handle everything else in the move patterns. */
14442 /* Determine if it's legal to put X into the constant pool. This
14443 is not possible for the address of thread-local symbols, which
14444 is checked above. */
14447 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14449 /* We can always put integral constants and vectors in memory. */
14450 switch (GET_CODE (x))
14453 case CONST_WIDE_INT:
14461 return !ix86_legitimate_constant_p (mode, x);
14464 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14468 is_imported_p (rtx x)
14470 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14471 || GET_CODE (x) != SYMBOL_REF)
14474 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14478 /* Nonzero if the constant value X is a legitimate general operand
14479 when generating PIC code. It is given that flag_pic is on and
14480 that X satisfies CONSTANT_P. */
14483 legitimate_pic_operand_p (rtx x)
14487 switch (GET_CODE (x))
14490 inner = XEXP (x, 0);
14491 if (GET_CODE (inner) == PLUS
14492 && CONST_INT_P (XEXP (inner, 1)))
14493 inner = XEXP (inner, 0);
14495 /* Only some unspecs are valid as "constants". */
14496 if (GET_CODE (inner) == UNSPEC)
14497 switch (XINT (inner, 1))
14500 case UNSPEC_GOTOFF:
14501 case UNSPEC_PLTOFF:
14502 return TARGET_64BIT;
14504 x = XVECEXP (inner, 0, 0);
14505 return (GET_CODE (x) == SYMBOL_REF
14506 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14507 case UNSPEC_MACHOPIC_OFFSET:
14508 return legitimate_pic_address_disp_p (x);
14516 return legitimate_pic_address_disp_p (x);
14523 /* Determine if a given CONST RTX is a valid memory displacement
14527 legitimate_pic_address_disp_p (rtx disp)
14531 /* In 64bit mode we can allow direct addresses of symbols and labels
14532 when they are not dynamic symbols. */
14535 rtx op0 = disp, op1;
14537 switch (GET_CODE (disp))
14543 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14545 op0 = XEXP (XEXP (disp, 0), 0);
14546 op1 = XEXP (XEXP (disp, 0), 1);
14547 if (!CONST_INT_P (op1)
14548 || INTVAL (op1) >= 16*1024*1024
14549 || INTVAL (op1) < -16*1024*1024)
14551 if (GET_CODE (op0) == LABEL_REF)
14553 if (GET_CODE (op0) == CONST
14554 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14555 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14557 if (GET_CODE (op0) == UNSPEC
14558 && XINT (op0, 1) == UNSPEC_PCREL)
14560 if (GET_CODE (op0) != SYMBOL_REF)
14565 /* TLS references should always be enclosed in UNSPEC.
14566 The dllimported symbol needs always to be resolved. */
14567 if (SYMBOL_REF_TLS_MODEL (op0)
14568 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14573 if (is_imported_p (op0))
14576 if (SYMBOL_REF_FAR_ADDR_P (op0)
14577 || !SYMBOL_REF_LOCAL_P (op0))
14580 /* Function-symbols need to be resolved only for
14582 For the small-model we don't need to resolve anything
14584 if ((ix86_cmodel != CM_LARGE_PIC
14585 && SYMBOL_REF_FUNCTION_P (op0))
14586 || ix86_cmodel == CM_SMALL_PIC)
14588 /* Non-external symbols don't need to be resolved for
14589 large, and medium-model. */
14590 if ((ix86_cmodel == CM_LARGE_PIC
14591 || ix86_cmodel == CM_MEDIUM_PIC)
14592 && !SYMBOL_REF_EXTERNAL_P (op0))
14595 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14596 && (SYMBOL_REF_LOCAL_P (op0)
14597 || (HAVE_LD_PIE_COPYRELOC
14599 && !SYMBOL_REF_WEAK (op0)
14600 && !SYMBOL_REF_FUNCTION_P (op0)))
14601 && ix86_cmodel != CM_LARGE_PIC)
14609 if (GET_CODE (disp) != CONST)
14611 disp = XEXP (disp, 0);
14615 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14616 of GOT tables. We should not need these anyway. */
14617 if (GET_CODE (disp) != UNSPEC
14618 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14619 && XINT (disp, 1) != UNSPEC_GOTOFF
14620 && XINT (disp, 1) != UNSPEC_PCREL
14621 && XINT (disp, 1) != UNSPEC_PLTOFF))
14624 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14625 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14631 if (GET_CODE (disp) == PLUS)
14633 if (!CONST_INT_P (XEXP (disp, 1)))
14635 disp = XEXP (disp, 0);
14639 if (TARGET_MACHO && darwin_local_data_pic (disp))
14642 if (GET_CODE (disp) != UNSPEC)
14645 switch (XINT (disp, 1))
14650 /* We need to check for both symbols and labels because VxWorks loads
14651 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14653 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14654 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14655 case UNSPEC_GOTOFF:
14656 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14657 While ABI specify also 32bit relocation but we don't produce it in
14658 small PIC model at all. */
14659 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14660 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14662 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14664 case UNSPEC_GOTTPOFF:
14665 case UNSPEC_GOTNTPOFF:
14666 case UNSPEC_INDNTPOFF:
14669 disp = XVECEXP (disp, 0, 0);
14670 return (GET_CODE (disp) == SYMBOL_REF
14671 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14672 case UNSPEC_NTPOFF:
14673 disp = XVECEXP (disp, 0, 0);
14674 return (GET_CODE (disp) == SYMBOL_REF
14675 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14676 case UNSPEC_DTPOFF:
14677 disp = XVECEXP (disp, 0, 0);
14678 return (GET_CODE (disp) == SYMBOL_REF
14679 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14685 /* Determine if op is suitable RTX for an address register.
14686 Return naked register if a register or a register subreg is
14687 found, otherwise return NULL_RTX. */
14690 ix86_validate_address_register (rtx op)
14692 machine_mode mode = GET_MODE (op);
14694 /* Only SImode or DImode registers can form the address. */
14695 if (mode != SImode && mode != DImode)
14700 else if (SUBREG_P (op))
14702 rtx reg = SUBREG_REG (op);
14707 mode = GET_MODE (reg);
14709 /* Don't allow SUBREGs that span more than a word. It can
14710 lead to spill failures when the register is one word out
14711 of a two word structure. */
14712 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14715 /* Allow only SUBREGs of non-eliminable hard registers. */
14716 if (register_no_elim_operand (reg, mode))
14720 /* Op is not a register. */
14724 /* Recognizes RTL expressions that are valid memory addresses for an
14725 instruction. The MODE argument is the machine mode for the MEM
14726 expression that wants to use this address.
14728 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14729 convert common non-canonical forms to canonical form so that they will
14733 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14735 struct ix86_address parts;
14736 rtx base, index, disp;
14737 HOST_WIDE_INT scale;
14740 if (ix86_decompose_address (addr, &parts) <= 0)
14741 /* Decomposition failed. */
14745 index = parts.index;
14747 scale = parts.scale;
14750 /* Validate base register. */
14753 rtx reg = ix86_validate_address_register (base);
14755 if (reg == NULL_RTX)
14758 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14759 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14760 /* Base is not valid. */
14764 /* Validate index register. */
14767 rtx reg = ix86_validate_address_register (index);
14769 if (reg == NULL_RTX)
14772 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14773 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14774 /* Index is not valid. */
14778 /* Index and base should have the same mode. */
14780 && GET_MODE (base) != GET_MODE (index))
14783 /* Address override works only on the (%reg) part of %fs:(%reg). */
14784 if (seg != ADDR_SPACE_GENERIC
14785 && ((base && GET_MODE (base) != word_mode)
14786 || (index && GET_MODE (index) != word_mode)))
14789 /* Validate scale factor. */
14793 /* Scale without index. */
14796 if (scale != 2 && scale != 4 && scale != 8)
14797 /* Scale is not a valid multiplier. */
14801 /* Validate displacement. */
14804 if (GET_CODE (disp) == CONST
14805 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14806 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14807 switch (XINT (XEXP (disp, 0), 1))
14809 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14810 used. While ABI specify also 32bit relocations, we don't produce
14811 them at all and use IP relative instead. */
14813 case UNSPEC_GOTOFF:
14814 gcc_assert (flag_pic);
14816 goto is_legitimate_pic;
14818 /* 64bit address unspec. */
14821 case UNSPEC_GOTPCREL:
14823 gcc_assert (flag_pic);
14824 goto is_legitimate_pic;
14826 case UNSPEC_GOTTPOFF:
14827 case UNSPEC_GOTNTPOFF:
14828 case UNSPEC_INDNTPOFF:
14829 case UNSPEC_NTPOFF:
14830 case UNSPEC_DTPOFF:
14833 case UNSPEC_STACK_CHECK:
14834 gcc_assert (flag_split_stack);
14838 /* Invalid address unspec. */
14842 else if (SYMBOLIC_CONST (disp)
14846 && MACHOPIC_INDIRECT
14847 && !machopic_operand_p (disp)
14853 if (TARGET_64BIT && (index || base))
14855 /* foo@dtpoff(%rX) is ok. */
14856 if (GET_CODE (disp) != CONST
14857 || GET_CODE (XEXP (disp, 0)) != PLUS
14858 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14859 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14860 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14861 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14862 /* Non-constant pic memory reference. */
14865 else if ((!TARGET_MACHO || flag_pic)
14866 && ! legitimate_pic_address_disp_p (disp))
14867 /* Displacement is an invalid pic construct. */
14870 else if (MACHO_DYNAMIC_NO_PIC_P
14871 && !ix86_legitimate_constant_p (Pmode, disp))
14872 /* displacment must be referenced via non_lazy_pointer */
14876 /* This code used to verify that a symbolic pic displacement
14877 includes the pic_offset_table_rtx register.
14879 While this is good idea, unfortunately these constructs may
14880 be created by "adds using lea" optimization for incorrect
14889 This code is nonsensical, but results in addressing
14890 GOT table with pic_offset_table_rtx base. We can't
14891 just refuse it easily, since it gets matched by
14892 "addsi3" pattern, that later gets split to lea in the
14893 case output register differs from input. While this
14894 can be handled by separate addsi pattern for this case
14895 that never results in lea, this seems to be easier and
14896 correct fix for crash to disable this test. */
14898 else if (GET_CODE (disp) != LABEL_REF
14899 && !CONST_INT_P (disp)
14900 && (GET_CODE (disp) != CONST
14901 || !ix86_legitimate_constant_p (Pmode, disp))
14902 && (GET_CODE (disp) != SYMBOL_REF
14903 || !ix86_legitimate_constant_p (Pmode, disp)))
14904 /* Displacement is not constant. */
14906 else if (TARGET_64BIT
14907 && !x86_64_immediate_operand (disp, VOIDmode))
14908 /* Displacement is out of range. */
14910 /* In x32 mode, constant addresses are sign extended to 64bit, so
14911 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14912 else if (TARGET_X32 && !(index || base)
14913 && CONST_INT_P (disp)
14914 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14918 /* Everything looks valid. */
14922 /* Determine if a given RTX is a valid constant address. */
14925 constant_address_p (rtx x)
14927 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14930 /* Return a unique alias set for the GOT. */
14932 static alias_set_type
14933 ix86_GOT_alias_set (void)
14935 static alias_set_type set = -1;
14937 set = new_alias_set ();
14941 /* Return a legitimate reference for ORIG (an address) using the
14942 register REG. If REG is 0, a new pseudo is generated.
14944 There are two types of references that must be handled:
14946 1. Global data references must load the address from the GOT, via
14947 the PIC reg. An insn is emitted to do this load, and the reg is
14950 2. Static data references, constant pool addresses, and code labels
14951 compute the address as an offset from the GOT, whose base is in
14952 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14953 differentiate them from global data objects. The returned
14954 address is the PIC reg + an unspec constant.
14956 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14957 reg also appears in the address. */
14960 legitimize_pic_address (rtx orig, rtx reg)
14963 rtx new_rtx = orig;
14966 if (TARGET_MACHO && !TARGET_64BIT)
14969 reg = gen_reg_rtx (Pmode);
14970 /* Use the generic Mach-O PIC machinery. */
14971 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
14975 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14977 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14982 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
14984 else if (TARGET_64BIT && !TARGET_PECOFF
14985 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
14988 /* This symbol may be referenced via a displacement from the PIC
14989 base address (@GOTOFF). */
14991 if (GET_CODE (addr) == CONST)
14992 addr = XEXP (addr, 0);
14993 if (GET_CODE (addr) == PLUS)
14995 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
14997 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15000 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15001 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15003 tmpreg = gen_reg_rtx (Pmode);
15006 emit_move_insn (tmpreg, new_rtx);
15010 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
15011 tmpreg, 1, OPTAB_DIRECT);
15015 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
15017 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
15019 /* This symbol may be referenced via a displacement from the PIC
15020 base address (@GOTOFF). */
15022 if (GET_CODE (addr) == CONST)
15023 addr = XEXP (addr, 0);
15024 if (GET_CODE (addr) == PLUS)
15026 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15028 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15031 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15032 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15033 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15037 emit_move_insn (reg, new_rtx);
15041 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15042 /* We can't use @GOTOFF for text labels on VxWorks;
15043 see gotoff_operand. */
15044 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15046 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15050 /* For x64 PE-COFF there is no GOT table. So we use address
15052 if (TARGET_64BIT && TARGET_PECOFF)
15054 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15055 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15058 reg = gen_reg_rtx (Pmode);
15059 emit_move_insn (reg, new_rtx);
15062 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15064 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
15065 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15066 new_rtx = gen_const_mem (Pmode, new_rtx);
15067 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15070 reg = gen_reg_rtx (Pmode);
15071 /* Use directly gen_movsi, otherwise the address is loaded
15072 into register for CSE. We don't want to CSE this addresses,
15073 instead we CSE addresses from the GOT table, so skip this. */
15074 emit_insn (gen_movsi (reg, new_rtx));
15079 /* This symbol must be referenced via a load from the
15080 Global Offset Table (@GOT). */
15082 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15083 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15085 new_rtx = force_reg (Pmode, new_rtx);
15086 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15087 new_rtx = gen_const_mem (Pmode, new_rtx);
15088 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15091 reg = gen_reg_rtx (Pmode);
15092 emit_move_insn (reg, new_rtx);
15098 if (CONST_INT_P (addr)
15099 && !x86_64_immediate_operand (addr, VOIDmode))
15103 emit_move_insn (reg, addr);
15107 new_rtx = force_reg (Pmode, addr);
15109 else if (GET_CODE (addr) == CONST)
15111 addr = XEXP (addr, 0);
15113 /* We must match stuff we generate before. Assume the only
15114 unspecs that can get here are ours. Not that we could do
15115 anything with them anyway.... */
15116 if (GET_CODE (addr) == UNSPEC
15117 || (GET_CODE (addr) == PLUS
15118 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15120 gcc_assert (GET_CODE (addr) == PLUS);
15122 if (GET_CODE (addr) == PLUS)
15124 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15126 /* Check first to see if this is a constant offset from a @GOTOFF
15127 symbol reference. */
15128 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15129 && CONST_INT_P (op1))
15133 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15135 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15136 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15137 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15141 emit_move_insn (reg, new_rtx);
15147 if (INTVAL (op1) < -16*1024*1024
15148 || INTVAL (op1) >= 16*1024*1024)
15150 if (!x86_64_immediate_operand (op1, Pmode))
15151 op1 = force_reg (Pmode, op1);
15152 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15158 rtx base = legitimize_pic_address (op0, reg);
15159 machine_mode mode = GET_MODE (base);
15161 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15163 if (CONST_INT_P (new_rtx))
15165 if (INTVAL (new_rtx) < -16*1024*1024
15166 || INTVAL (new_rtx) >= 16*1024*1024)
15168 if (!x86_64_immediate_operand (new_rtx, mode))
15169 new_rtx = force_reg (mode, new_rtx);
15171 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15174 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15178 /* For %rip addressing, we have to use just disp32, not
15181 && (GET_CODE (base) == SYMBOL_REF
15182 || GET_CODE (base) == LABEL_REF))
15183 base = force_reg (mode, base);
15184 if (GET_CODE (new_rtx) == PLUS
15185 && CONSTANT_P (XEXP (new_rtx, 1)))
15187 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15188 new_rtx = XEXP (new_rtx, 1);
15190 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15198 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15201 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15203 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15205 if (GET_MODE (tp) != tp_mode)
15207 gcc_assert (GET_MODE (tp) == SImode);
15208 gcc_assert (tp_mode == DImode);
15210 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15214 tp = copy_to_mode_reg (tp_mode, tp);
15219 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15221 static GTY(()) rtx ix86_tls_symbol;
15224 ix86_tls_get_addr (void)
15226 if (!ix86_tls_symbol)
15229 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15230 ? "___tls_get_addr" : "__tls_get_addr");
15232 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15235 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15237 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15239 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15240 gen_rtx_CONST (Pmode, unspec));
15243 return ix86_tls_symbol;
15246 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15248 static GTY(()) rtx ix86_tls_module_base_symbol;
15251 ix86_tls_module_base (void)
15253 if (!ix86_tls_module_base_symbol)
15255 ix86_tls_module_base_symbol
15256 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15258 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15259 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15262 return ix86_tls_module_base_symbol;
15265 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15266 false if we expect this to be used for a memory address and true if
15267 we expect to load the address into a register. */
15270 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15272 rtx dest, base, off;
15273 rtx pic = NULL_RTX, tp = NULL_RTX;
15274 machine_mode tp_mode = Pmode;
15277 /* Fall back to global dynamic model if tool chain cannot support local
15279 if (TARGET_SUN_TLS && !TARGET_64BIT
15280 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15281 && model == TLS_MODEL_LOCAL_DYNAMIC)
15282 model = TLS_MODEL_GLOBAL_DYNAMIC;
15286 case TLS_MODEL_GLOBAL_DYNAMIC:
15287 dest = gen_reg_rtx (Pmode);
15291 if (flag_pic && !TARGET_PECOFF)
15292 pic = pic_offset_table_rtx;
15295 pic = gen_reg_rtx (Pmode);
15296 emit_insn (gen_set_got (pic));
15300 if (TARGET_GNU2_TLS)
15303 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15305 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15307 tp = get_thread_pointer (Pmode, true);
15308 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15310 if (GET_MODE (x) != Pmode)
15311 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15313 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15317 rtx caddr = ix86_tls_get_addr ();
15321 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15326 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15327 insns = get_insns ();
15330 if (GET_MODE (x) != Pmode)
15331 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15333 RTL_CONST_CALL_P (insns) = 1;
15334 emit_libcall_block (insns, dest, rax, x);
15337 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15341 case TLS_MODEL_LOCAL_DYNAMIC:
15342 base = gen_reg_rtx (Pmode);
15347 pic = pic_offset_table_rtx;
15350 pic = gen_reg_rtx (Pmode);
15351 emit_insn (gen_set_got (pic));
15355 if (TARGET_GNU2_TLS)
15357 rtx tmp = ix86_tls_module_base ();
15360 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15362 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15364 tp = get_thread_pointer (Pmode, true);
15365 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15366 gen_rtx_MINUS (Pmode, tmp, tp));
15370 rtx caddr = ix86_tls_get_addr ();
15374 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15380 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15381 insns = get_insns ();
15384 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15385 share the LD_BASE result with other LD model accesses. */
15386 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15387 UNSPEC_TLS_LD_BASE);
15389 RTL_CONST_CALL_P (insns) = 1;
15390 emit_libcall_block (insns, base, rax, eqv);
15393 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15396 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15397 off = gen_rtx_CONST (Pmode, off);
15399 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15401 if (TARGET_GNU2_TLS)
15403 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15405 if (GET_MODE (x) != Pmode)
15406 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15408 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15412 case TLS_MODEL_INITIAL_EXEC:
15415 if (TARGET_SUN_TLS && !TARGET_X32)
15417 /* The Sun linker took the AMD64 TLS spec literally
15418 and can only handle %rax as destination of the
15419 initial executable code sequence. */
15421 dest = gen_reg_rtx (DImode);
15422 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15426 /* Generate DImode references to avoid %fs:(%reg32)
15427 problems and linker IE->LE relaxation bug. */
15430 type = UNSPEC_GOTNTPOFF;
15434 pic = pic_offset_table_rtx;
15435 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15437 else if (!TARGET_ANY_GNU_TLS)
15439 pic = gen_reg_rtx (Pmode);
15440 emit_insn (gen_set_got (pic));
15441 type = UNSPEC_GOTTPOFF;
15446 type = UNSPEC_INDNTPOFF;
15449 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15450 off = gen_rtx_CONST (tp_mode, off);
15452 off = gen_rtx_PLUS (tp_mode, pic, off);
15453 off = gen_const_mem (tp_mode, off);
15454 set_mem_alias_set (off, ix86_GOT_alias_set ());
15456 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15458 base = get_thread_pointer (tp_mode,
15459 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15460 off = force_reg (tp_mode, off);
15461 return gen_rtx_PLUS (tp_mode, base, off);
15465 base = get_thread_pointer (Pmode, true);
15466 dest = gen_reg_rtx (Pmode);
15467 emit_insn (ix86_gen_sub3 (dest, base, off));
15471 case TLS_MODEL_LOCAL_EXEC:
15472 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15473 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15474 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15475 off = gen_rtx_CONST (Pmode, off);
15477 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15479 base = get_thread_pointer (Pmode,
15480 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15481 return gen_rtx_PLUS (Pmode, base, off);
15485 base = get_thread_pointer (Pmode, true);
15486 dest = gen_reg_rtx (Pmode);
15487 emit_insn (ix86_gen_sub3 (dest, base, off));
15492 gcc_unreachable ();
15498 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15499 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15500 unique refptr-DECL symbol corresponding to symbol DECL. */
15502 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15504 static inline hashval_t hash (tree_map *m) { return m->hash; }
15506 equal (tree_map *a, tree_map *b)
15508 return a->base.from == b->base.from;
15512 keep_cache_entry (tree_map *&m)
15514 return ggc_marked_p (m->base.from);
15518 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15521 get_dllimport_decl (tree decl, bool beimport)
15523 struct tree_map *h, in;
15525 const char *prefix;
15526 size_t namelen, prefixlen;
15531 if (!dllimport_map)
15532 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15534 in.hash = htab_hash_pointer (decl);
15535 in.base.from = decl;
15536 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15541 *loc = h = ggc_alloc<tree_map> ();
15543 h->base.from = decl;
15544 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15545 VAR_DECL, NULL, ptr_type_node);
15546 DECL_ARTIFICIAL (to) = 1;
15547 DECL_IGNORED_P (to) = 1;
15548 DECL_EXTERNAL (to) = 1;
15549 TREE_READONLY (to) = 1;
15551 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15552 name = targetm.strip_name_encoding (name);
15554 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15555 ? "*__imp_" : "*__imp__";
15557 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15558 namelen = strlen (name);
15559 prefixlen = strlen (prefix);
15560 imp_name = (char *) alloca (namelen + prefixlen + 1);
15561 memcpy (imp_name, prefix, prefixlen);
15562 memcpy (imp_name + prefixlen, name, namelen + 1);
15564 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15565 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15566 SET_SYMBOL_REF_DECL (rtl, to);
15567 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15570 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15571 #ifdef SUB_TARGET_RECORD_STUB
15572 SUB_TARGET_RECORD_STUB (name);
15576 rtl = gen_const_mem (Pmode, rtl);
15577 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15579 SET_DECL_RTL (to, rtl);
15580 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15585 /* Expand SYMBOL into its corresponding far-addresse symbol.
15586 WANT_REG is true if we require the result be a register. */
15589 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15594 gcc_assert (SYMBOL_REF_DECL (symbol));
15595 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15597 x = DECL_RTL (imp_decl);
15599 x = force_reg (Pmode, x);
15603 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15604 true if we require the result be a register. */
15607 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15612 gcc_assert (SYMBOL_REF_DECL (symbol));
15613 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15615 x = DECL_RTL (imp_decl);
15617 x = force_reg (Pmode, x);
15621 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15622 is true if we require the result be a register. */
15625 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15627 if (!TARGET_PECOFF)
15630 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15632 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15633 return legitimize_dllimport_symbol (addr, inreg);
15634 if (GET_CODE (addr) == CONST
15635 && GET_CODE (XEXP (addr, 0)) == PLUS
15636 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15637 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15639 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15640 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15644 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15646 if (GET_CODE (addr) == SYMBOL_REF
15647 && !is_imported_p (addr)
15648 && SYMBOL_REF_EXTERNAL_P (addr)
15649 && SYMBOL_REF_DECL (addr))
15650 return legitimize_pe_coff_extern_decl (addr, inreg);
15652 if (GET_CODE (addr) == CONST
15653 && GET_CODE (XEXP (addr, 0)) == PLUS
15654 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15655 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15656 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15657 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15659 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15660 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15665 /* Try machine-dependent ways of modifying an illegitimate address
15666 to be legitimate. If we find one, return the new, valid address.
15667 This macro is used in only one place: `memory_address' in explow.c.
15669 OLDX is the address as it was before break_out_memory_refs was called.
15670 In some cases it is useful to look at this to decide what needs to be done.
15672 It is always safe for this macro to do nothing. It exists to recognize
15673 opportunities to optimize the output.
15675 For the 80386, we handle X+REG by loading X into a register R and
15676 using R+REG. R will go in a general reg and indexing will be used.
15677 However, if REG is a broken-out memory address or multiplication,
15678 nothing needs to be done because REG can certainly go in a general reg.
15680 When -fpic is used, special handling is needed for symbolic references.
15681 See comments by legitimize_pic_address in i386.c for details. */
15684 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15686 bool changed = false;
15689 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15691 return legitimize_tls_address (x, (enum tls_model) log, false);
15692 if (GET_CODE (x) == CONST
15693 && GET_CODE (XEXP (x, 0)) == PLUS
15694 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15695 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15697 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15698 (enum tls_model) log, false);
15699 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15702 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15704 rtx tmp = legitimize_pe_coff_symbol (x, true);
15709 if (flag_pic && SYMBOLIC_CONST (x))
15710 return legitimize_pic_address (x, 0);
15713 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15714 return machopic_indirect_data_reference (x, 0);
15717 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15718 if (GET_CODE (x) == ASHIFT
15719 && CONST_INT_P (XEXP (x, 1))
15720 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15723 log = INTVAL (XEXP (x, 1));
15724 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15725 GEN_INT (1 << log));
15728 if (GET_CODE (x) == PLUS)
15730 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15732 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15733 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15734 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15737 log = INTVAL (XEXP (XEXP (x, 0), 1));
15738 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15739 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15740 GEN_INT (1 << log));
15743 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15744 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15745 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15748 log = INTVAL (XEXP (XEXP (x, 1), 1));
15749 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15750 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15751 GEN_INT (1 << log));
15754 /* Put multiply first if it isn't already. */
15755 if (GET_CODE (XEXP (x, 1)) == MULT)
15757 std::swap (XEXP (x, 0), XEXP (x, 1));
15761 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15762 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15763 created by virtual register instantiation, register elimination, and
15764 similar optimizations. */
15765 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15768 x = gen_rtx_PLUS (Pmode,
15769 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15770 XEXP (XEXP (x, 1), 0)),
15771 XEXP (XEXP (x, 1), 1));
15775 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15776 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15777 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15778 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15779 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15780 && CONSTANT_P (XEXP (x, 1)))
15783 rtx other = NULL_RTX;
15785 if (CONST_INT_P (XEXP (x, 1)))
15787 constant = XEXP (x, 1);
15788 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15790 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15792 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15793 other = XEXP (x, 1);
15801 x = gen_rtx_PLUS (Pmode,
15802 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15803 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15804 plus_constant (Pmode, other,
15805 INTVAL (constant)));
15809 if (changed && ix86_legitimate_address_p (mode, x, false))
15812 if (GET_CODE (XEXP (x, 0)) == MULT)
15815 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15818 if (GET_CODE (XEXP (x, 1)) == MULT)
15821 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15825 && REG_P (XEXP (x, 1))
15826 && REG_P (XEXP (x, 0)))
15829 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15832 x = legitimize_pic_address (x, 0);
15835 if (changed && ix86_legitimate_address_p (mode, x, false))
15838 if (REG_P (XEXP (x, 0)))
15840 rtx temp = gen_reg_rtx (Pmode);
15841 rtx val = force_operand (XEXP (x, 1), temp);
15844 val = convert_to_mode (Pmode, val, 1);
15845 emit_move_insn (temp, val);
15848 XEXP (x, 1) = temp;
15852 else if (REG_P (XEXP (x, 1)))
15854 rtx temp = gen_reg_rtx (Pmode);
15855 rtx val = force_operand (XEXP (x, 0), temp);
15858 val = convert_to_mode (Pmode, val, 1);
15859 emit_move_insn (temp, val);
15862 XEXP (x, 0) = temp;
15870 /* Print an integer constant expression in assembler syntax. Addition
15871 and subtraction are the only arithmetic that may appear in these
15872 expressions. FILE is the stdio stream to write to, X is the rtx, and
15873 CODE is the operand print code from the output string. */
15876 output_pic_addr_const (FILE *file, rtx x, int code)
15880 switch (GET_CODE (x))
15883 gcc_assert (flag_pic);
15888 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15889 output_addr_const (file, x);
15892 const char *name = XSTR (x, 0);
15894 /* Mark the decl as referenced so that cgraph will
15895 output the function. */
15896 if (SYMBOL_REF_DECL (x))
15897 mark_decl_referenced (SYMBOL_REF_DECL (x));
15900 if (MACHOPIC_INDIRECT
15901 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15902 name = machopic_indirection_name (x, /*stub_p=*/true);
15904 assemble_name (file, name);
15906 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15907 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15908 fputs ("@PLT", file);
15915 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15916 assemble_name (asm_out_file, buf);
15920 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15924 /* This used to output parentheses around the expression,
15925 but that does not work on the 386 (either ATT or BSD assembler). */
15926 output_pic_addr_const (file, XEXP (x, 0), code);
15930 /* We can't handle floating point constants;
15931 TARGET_PRINT_OPERAND must handle them. */
15932 output_operand_lossage ("floating constant misused");
15936 /* Some assemblers need integer constants to appear first. */
15937 if (CONST_INT_P (XEXP (x, 0)))
15939 output_pic_addr_const (file, XEXP (x, 0), code);
15941 output_pic_addr_const (file, XEXP (x, 1), code);
15945 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15946 output_pic_addr_const (file, XEXP (x, 1), code);
15948 output_pic_addr_const (file, XEXP (x, 0), code);
15954 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15955 output_pic_addr_const (file, XEXP (x, 0), code);
15957 output_pic_addr_const (file, XEXP (x, 1), code);
15959 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15963 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15965 bool f = i386_asm_output_addr_const_extra (file, x);
15970 gcc_assert (XVECLEN (x, 0) == 1);
15971 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
15972 switch (XINT (x, 1))
15975 fputs ("@GOT", file);
15977 case UNSPEC_GOTOFF:
15978 fputs ("@GOTOFF", file);
15980 case UNSPEC_PLTOFF:
15981 fputs ("@PLTOFF", file);
15984 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15985 "(%rip)" : "[rip]", file);
15987 case UNSPEC_GOTPCREL:
15988 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15989 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
15991 case UNSPEC_GOTTPOFF:
15992 /* FIXME: This might be @TPOFF in Sun ld too. */
15993 fputs ("@gottpoff", file);
15996 fputs ("@tpoff", file);
15998 case UNSPEC_NTPOFF:
16000 fputs ("@tpoff", file);
16002 fputs ("@ntpoff", file);
16004 case UNSPEC_DTPOFF:
16005 fputs ("@dtpoff", file);
16007 case UNSPEC_GOTNTPOFF:
16009 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16010 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16012 fputs ("@gotntpoff", file);
16014 case UNSPEC_INDNTPOFF:
16015 fputs ("@indntpoff", file);
16018 case UNSPEC_MACHOPIC_OFFSET:
16020 machopic_output_function_base_name (file);
16024 output_operand_lossage ("invalid UNSPEC as operand");
16030 output_operand_lossage ("invalid expression as operand");
16034 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16035 We need to emit DTP-relative relocations. */
16037 static void ATTRIBUTE_UNUSED
16038 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16040 fputs (ASM_LONG, file);
16041 output_addr_const (file, x);
16042 fputs ("@dtpoff", file);
16048 fputs (", 0", file);
16051 gcc_unreachable ();
16055 /* Return true if X is a representation of the PIC register. This copes
16056 with calls from ix86_find_base_term, where the register might have
16057 been replaced by a cselib value. */
16060 ix86_pic_register_p (rtx x)
16062 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16063 return (pic_offset_table_rtx
16064 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16065 else if (!REG_P (x))
16067 else if (pic_offset_table_rtx)
16069 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16071 if (HARD_REGISTER_P (x)
16072 && !HARD_REGISTER_P (pic_offset_table_rtx)
16073 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16078 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16081 /* Helper function for ix86_delegitimize_address.
16082 Attempt to delegitimize TLS local-exec accesses. */
16085 ix86_delegitimize_tls_address (rtx orig_x)
16087 rtx x = orig_x, unspec;
16088 struct ix86_address addr;
16090 if (!TARGET_TLS_DIRECT_SEG_REFS)
16094 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16096 if (ix86_decompose_address (x, &addr) == 0
16097 || addr.seg != DEFAULT_TLS_SEG_REG
16098 || addr.disp == NULL_RTX
16099 || GET_CODE (addr.disp) != CONST)
16101 unspec = XEXP (addr.disp, 0);
16102 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16103 unspec = XEXP (unspec, 0);
16104 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16106 x = XVECEXP (unspec, 0, 0);
16107 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16108 if (unspec != XEXP (addr.disp, 0))
16109 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16112 rtx idx = addr.index;
16113 if (addr.scale != 1)
16114 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16115 x = gen_rtx_PLUS (Pmode, idx, x);
16118 x = gen_rtx_PLUS (Pmode, addr.base, x);
16119 if (MEM_P (orig_x))
16120 x = replace_equiv_address_nv (orig_x, x);
16124 /* In the name of slightly smaller debug output, and to cater to
16125 general assembler lossage, recognize PIC+GOTOFF and turn it back
16126 into a direct symbol reference.
16128 On Darwin, this is necessary to avoid a crash, because Darwin
16129 has a different PIC label for each routine but the DWARF debugging
16130 information is not associated with any particular routine, so it's
16131 necessary to remove references to the PIC label from RTL stored by
16132 the DWARF output code. */
16135 ix86_delegitimize_address (rtx x)
16137 rtx orig_x = delegitimize_mem_from_attrs (x);
16138 /* addend is NULL or some rtx if x is something+GOTOFF where
16139 something doesn't include the PIC register. */
16140 rtx addend = NULL_RTX;
16141 /* reg_addend is NULL or a multiple of some register. */
16142 rtx reg_addend = NULL_RTX;
16143 /* const_addend is NULL or a const_int. */
16144 rtx const_addend = NULL_RTX;
16145 /* This is the result, or NULL. */
16146 rtx result = NULL_RTX;
16155 if (GET_CODE (x) == CONST
16156 && GET_CODE (XEXP (x, 0)) == PLUS
16157 && GET_MODE (XEXP (x, 0)) == Pmode
16158 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16159 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16160 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16162 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16163 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16164 if (MEM_P (orig_x))
16165 x = replace_equiv_address_nv (orig_x, x);
16169 if (GET_CODE (x) == CONST
16170 && GET_CODE (XEXP (x, 0)) == UNSPEC
16171 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16172 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16173 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16175 x = XVECEXP (XEXP (x, 0), 0, 0);
16176 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16178 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16186 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16187 return ix86_delegitimize_tls_address (orig_x);
16189 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16190 and -mcmodel=medium -fpic. */
16193 if (GET_CODE (x) != PLUS
16194 || GET_CODE (XEXP (x, 1)) != CONST)
16195 return ix86_delegitimize_tls_address (orig_x);
16197 if (ix86_pic_register_p (XEXP (x, 0)))
16198 /* %ebx + GOT/GOTOFF */
16200 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16202 /* %ebx + %reg * scale + GOT/GOTOFF */
16203 reg_addend = XEXP (x, 0);
16204 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16205 reg_addend = XEXP (reg_addend, 1);
16206 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16207 reg_addend = XEXP (reg_addend, 0);
16210 reg_addend = NULL_RTX;
16211 addend = XEXP (x, 0);
16215 addend = XEXP (x, 0);
16217 x = XEXP (XEXP (x, 1), 0);
16218 if (GET_CODE (x) == PLUS
16219 && CONST_INT_P (XEXP (x, 1)))
16221 const_addend = XEXP (x, 1);
16225 if (GET_CODE (x) == UNSPEC
16226 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16227 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16228 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16229 && !MEM_P (orig_x) && !addend)))
16230 result = XVECEXP (x, 0, 0);
16232 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16233 && !MEM_P (orig_x))
16234 result = XVECEXP (x, 0, 0);
16237 return ix86_delegitimize_tls_address (orig_x);
16240 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16242 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16245 /* If the rest of original X doesn't involve the PIC register, add
16246 addend and subtract pic_offset_table_rtx. This can happen e.g.
16248 leal (%ebx, %ecx, 4), %ecx
16250 movl foo@GOTOFF(%ecx), %edx
16251 in which case we return (%ecx - %ebx) + foo
16252 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16253 and reload has completed. */
16254 if (pic_offset_table_rtx
16255 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16256 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16257 pic_offset_table_rtx),
16259 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16261 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16262 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16263 result = gen_rtx_PLUS (Pmode, tmp, result);
16268 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16270 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16271 if (result == NULL_RTX)
16277 /* If X is a machine specific address (i.e. a symbol or label being
16278 referenced as a displacement from the GOT implemented using an
16279 UNSPEC), then return the base term. Otherwise return X. */
16282 ix86_find_base_term (rtx x)
16288 if (GET_CODE (x) != CONST)
16290 term = XEXP (x, 0);
16291 if (GET_CODE (term) == PLUS
16292 && CONST_INT_P (XEXP (term, 1)))
16293 term = XEXP (term, 0);
16294 if (GET_CODE (term) != UNSPEC
16295 || (XINT (term, 1) != UNSPEC_GOTPCREL
16296 && XINT (term, 1) != UNSPEC_PCREL))
16299 return XVECEXP (term, 0, 0);
16302 return ix86_delegitimize_address (x);
16306 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16307 bool fp, FILE *file)
16309 const char *suffix;
16311 if (mode == CCFPmode || mode == CCFPUmode)
16313 code = ix86_fp_compare_code_to_integer (code);
16317 code = reverse_condition (code);
16368 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16372 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16373 Those same assemblers have the same but opposite lossage on cmov. */
16374 if (mode == CCmode)
16375 suffix = fp ? "nbe" : "a";
16377 gcc_unreachable ();
16393 gcc_unreachable ();
16397 if (mode == CCmode)
16399 else if (mode == CCCmode)
16400 suffix = fp ? "b" : "c";
16402 gcc_unreachable ();
16418 gcc_unreachable ();
16422 if (mode == CCmode)
16424 else if (mode == CCCmode)
16425 suffix = fp ? "nb" : "nc";
16427 gcc_unreachable ();
16430 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16434 if (mode == CCmode)
16437 gcc_unreachable ();
16440 suffix = fp ? "u" : "p";
16443 suffix = fp ? "nu" : "np";
16446 gcc_unreachable ();
16448 fputs (suffix, file);
16451 /* Print the name of register X to FILE based on its machine mode and number.
16452 If CODE is 'w', pretend the mode is HImode.
16453 If CODE is 'b', pretend the mode is QImode.
16454 If CODE is 'k', pretend the mode is SImode.
16455 If CODE is 'q', pretend the mode is DImode.
16456 If CODE is 'x', pretend the mode is V4SFmode.
16457 If CODE is 't', pretend the mode is V8SFmode.
16458 If CODE is 'g', pretend the mode is V16SFmode.
16459 If CODE is 'h', pretend the reg is the 'high' byte register.
16460 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16461 If CODE is 'd', duplicate the operand for AVX instruction.
16465 print_reg (rtx x, int code, FILE *file)
16469 unsigned int regno;
16472 if (ASSEMBLER_DIALECT == ASM_ATT)
16477 gcc_assert (TARGET_64BIT);
16478 fputs ("rip", file);
16482 if (code == 'y' && STACK_TOP_P (x))
16484 fputs ("st(0)", file);
16490 else if (code == 'b')
16492 else if (code == 'k')
16494 else if (code == 'q')
16496 else if (code == 'h')
16498 else if (code == 'x')
16500 else if (code == 't')
16502 else if (code == 'g')
16505 msize = GET_MODE_SIZE (GET_MODE (x));
16507 regno = true_regnum (x);
16509 gcc_assert (regno != ARG_POINTER_REGNUM
16510 && regno != FRAME_POINTER_REGNUM
16511 && regno != FLAGS_REG
16512 && regno != FPSR_REG
16513 && regno != FPCR_REG);
16515 duplicated = code == 'd' && TARGET_AVX;
16521 if (LEGACY_INT_REGNO_P (regno))
16522 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16527 reg = hi_reg_name[regno];
16530 if (regno >= ARRAY_SIZE (qi_reg_name))
16532 reg = qi_reg_name[regno];
16535 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16537 reg = qi_high_reg_name[regno];
16541 if (SSE_REGNO_P (regno))
16543 gcc_assert (!duplicated);
16544 putc (msize == 32 ? 'y' : 'z', file);
16545 reg = hi_reg_name[regno] + 1;
16550 gcc_unreachable ();
16555 /* Irritatingly, AMD extended registers use
16556 different naming convention: "r%d[bwd]" */
16557 if (REX_INT_REGNO_P (regno))
16559 gcc_assert (TARGET_64BIT);
16563 error ("extended registers have no high halves");
16578 error ("unsupported operand size for extended register");
16586 if (ASSEMBLER_DIALECT == ASM_ATT)
16587 fprintf (file, ", %%%s", reg);
16589 fprintf (file, ", %s", reg);
16593 /* Meaning of CODE:
16594 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16595 C -- print opcode suffix for set/cmov insn.
16596 c -- like C, but print reversed condition
16597 F,f -- likewise, but for floating-point.
16598 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16600 R -- print embeded rounding and sae.
16601 r -- print only sae.
16602 z -- print the opcode suffix for the size of the current operand.
16603 Z -- likewise, with special suffixes for x87 instructions.
16604 * -- print a star (in certain assembler syntax)
16605 A -- print an absolute memory reference.
16606 E -- print address with DImode register names if TARGET_64BIT.
16607 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16608 s -- print a shift double count, followed by the assemblers argument
16610 b -- print the QImode name of the register for the indicated operand.
16611 %b0 would print %al if operands[0] is reg 0.
16612 w -- likewise, print the HImode name of the register.
16613 k -- likewise, print the SImode name of the register.
16614 q -- likewise, print the DImode name of the register.
16615 x -- likewise, print the V4SFmode name of the register.
16616 t -- likewise, print the V8SFmode name of the register.
16617 g -- likewise, print the V16SFmode name of the register.
16618 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16619 y -- print "st(0)" instead of "st" as a register.
16620 d -- print duplicated register operand for AVX instruction.
16621 D -- print condition for SSE cmp instruction.
16622 P -- if PIC, print an @PLT suffix.
16623 p -- print raw symbol name.
16624 X -- don't print any sort of PIC '@' suffix for a symbol.
16625 & -- print some in-use local-dynamic symbol name.
16626 H -- print a memory address offset by 8; used for sse high-parts
16627 Y -- print condition for XOP pcom* instruction.
16628 + -- print a branch hint as 'cs' or 'ds' prefix
16629 ; -- print a semicolon (after prefixes due to bug in older gas).
16630 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16631 @ -- print a segment register of thread base pointer load
16632 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16633 ! -- print MPX prefix for jxx/call/ret instructions if required.
16637 ix86_print_operand (FILE *file, rtx x, int code)
16644 switch (ASSEMBLER_DIALECT)
16651 /* Intel syntax. For absolute addresses, registers should not
16652 be surrounded by braces. */
16656 ix86_print_operand (file, x, 0);
16663 gcc_unreachable ();
16666 ix86_print_operand (file, x, 0);
16670 /* Wrap address in an UNSPEC to declare special handling. */
16672 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16674 output_address (VOIDmode, x);
16678 if (ASSEMBLER_DIALECT == ASM_ATT)
16683 if (ASSEMBLER_DIALECT == ASM_ATT)
16688 if (ASSEMBLER_DIALECT == ASM_ATT)
16693 if (ASSEMBLER_DIALECT == ASM_ATT)
16698 if (ASSEMBLER_DIALECT == ASM_ATT)
16703 if (ASSEMBLER_DIALECT == ASM_ATT)
16708 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16709 if (ASSEMBLER_DIALECT != ASM_ATT)
16712 switch (GET_MODE_SIZE (GET_MODE (x)))
16727 output_operand_lossage
16728 ("invalid operand size for operand code 'O'");
16737 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16739 /* Opcodes don't get size suffixes if using Intel opcodes. */
16740 if (ASSEMBLER_DIALECT == ASM_INTEL)
16743 switch (GET_MODE_SIZE (GET_MODE (x)))
16762 output_operand_lossage
16763 ("invalid operand size for operand code 'z'");
16768 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16770 (0, "non-integer operand used with operand code 'z'");
16774 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16775 if (ASSEMBLER_DIALECT == ASM_INTEL)
16778 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16780 switch (GET_MODE_SIZE (GET_MODE (x)))
16783 #ifdef HAVE_AS_IX86_FILDS
16793 #ifdef HAVE_AS_IX86_FILDQ
16796 fputs ("ll", file);
16804 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16806 /* 387 opcodes don't get size suffixes
16807 if the operands are registers. */
16808 if (STACK_REG_P (x))
16811 switch (GET_MODE_SIZE (GET_MODE (x)))
16832 output_operand_lossage
16833 ("invalid operand type used with operand code 'Z'");
16837 output_operand_lossage
16838 ("invalid operand size for operand code 'Z'");
16857 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16859 ix86_print_operand (file, x, 0);
16860 fputs (", ", file);
16865 switch (GET_CODE (x))
16868 fputs ("neq", file);
16871 fputs ("eq", file);
16875 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16879 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16883 fputs ("le", file);
16887 fputs ("lt", file);
16890 fputs ("unord", file);
16893 fputs ("ord", file);
16896 fputs ("ueq", file);
16899 fputs ("nlt", file);
16902 fputs ("nle", file);
16905 fputs ("ule", file);
16908 fputs ("ult", file);
16911 fputs ("une", file);
16914 output_operand_lossage ("operand is not a condition code, "
16915 "invalid operand code 'Y'");
16921 /* Little bit of braindamage here. The SSE compare instructions
16922 does use completely different names for the comparisons that the
16923 fp conditional moves. */
16924 switch (GET_CODE (x))
16929 fputs ("eq_us", file);
16933 fputs ("eq", file);
16938 fputs ("nge", file);
16942 fputs ("lt", file);
16947 fputs ("ngt", file);
16951 fputs ("le", file);
16954 fputs ("unord", file);
16959 fputs ("neq_oq", file);
16963 fputs ("neq", file);
16968 fputs ("ge", file);
16972 fputs ("nlt", file);
16977 fputs ("gt", file);
16981 fputs ("nle", file);
16984 fputs ("ord", file);
16987 output_operand_lossage ("operand is not a condition code, "
16988 "invalid operand code 'D'");
16995 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16996 if (ASSEMBLER_DIALECT == ASM_ATT)
17002 if (!COMPARISON_P (x))
17004 output_operand_lossage ("operand is not a condition code, "
17005 "invalid operand code '%c'", code);
17008 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17009 code == 'c' || code == 'f',
17010 code == 'F' || code == 'f',
17015 if (!offsettable_memref_p (x))
17017 output_operand_lossage ("operand is not an offsettable memory "
17018 "reference, invalid operand code 'H'");
17021 /* It doesn't actually matter what mode we use here, as we're
17022 only going to use this for printing. */
17023 x = adjust_address_nv (x, DImode, 8);
17024 /* Output 'qword ptr' for intel assembler dialect. */
17025 if (ASSEMBLER_DIALECT == ASM_INTEL)
17030 gcc_assert (CONST_INT_P (x));
17032 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17033 #ifdef HAVE_AS_IX86_HLE
17034 fputs ("xacquire ", file);
17036 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17038 else if (INTVAL (x) & IX86_HLE_RELEASE)
17039 #ifdef HAVE_AS_IX86_HLE
17040 fputs ("xrelease ", file);
17042 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17044 /* We do not want to print value of the operand. */
17048 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17049 fputs ("{z}", file);
17053 gcc_assert (CONST_INT_P (x));
17054 gcc_assert (INTVAL (x) == ROUND_SAE);
17056 if (ASSEMBLER_DIALECT == ASM_INTEL)
17057 fputs (", ", file);
17059 fputs ("{sae}", file);
17061 if (ASSEMBLER_DIALECT == ASM_ATT)
17062 fputs (", ", file);
17067 gcc_assert (CONST_INT_P (x));
17069 if (ASSEMBLER_DIALECT == ASM_INTEL)
17070 fputs (", ", file);
17072 switch (INTVAL (x))
17074 case ROUND_NEAREST_INT | ROUND_SAE:
17075 fputs ("{rn-sae}", file);
17077 case ROUND_NEG_INF | ROUND_SAE:
17078 fputs ("{rd-sae}", file);
17080 case ROUND_POS_INF | ROUND_SAE:
17081 fputs ("{ru-sae}", file);
17083 case ROUND_ZERO | ROUND_SAE:
17084 fputs ("{rz-sae}", file);
17087 gcc_unreachable ();
17090 if (ASSEMBLER_DIALECT == ASM_ATT)
17091 fputs (", ", file);
17096 if (ASSEMBLER_DIALECT == ASM_ATT)
17102 const char *name = get_some_local_dynamic_name ();
17104 output_operand_lossage ("'%%&' used without any "
17105 "local dynamic TLS references");
17107 assemble_name (file, name);
17116 || optimize_function_for_size_p (cfun)
17117 || !TARGET_BRANCH_PREDICTION_HINTS)
17120 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17123 int pred_val = XINT (x, 0);
17125 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17126 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17128 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17130 = final_forward_branch_p (current_output_insn) == 0;
17132 /* Emit hints only in the case default branch prediction
17133 heuristics would fail. */
17134 if (taken != cputaken)
17136 /* We use 3e (DS) prefix for taken branches and
17137 2e (CS) prefix for not taken branches. */
17139 fputs ("ds ; ", file);
17141 fputs ("cs ; ", file);
17149 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17155 if (ASSEMBLER_DIALECT == ASM_ATT)
17158 /* The kernel uses a different segment register for performance
17159 reasons; a system call would not have to trash the userspace
17160 segment register, which would be expensive. */
17161 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17162 fputs ("fs", file);
17164 fputs ("gs", file);
17168 putc (TARGET_AVX2 ? 'i' : 'f', file);
17172 if (TARGET_64BIT && Pmode != word_mode)
17173 fputs ("addr32 ", file);
17177 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17178 fputs ("bnd ", file);
17182 output_operand_lossage ("invalid operand code '%c'", code);
17187 print_reg (x, code, file);
17189 else if (MEM_P (x))
17191 rtx addr = XEXP (x, 0);
17193 /* No `byte ptr' prefix for call instructions ... */
17194 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17196 machine_mode mode = GET_MODE (x);
17199 /* Check for explicit size override codes. */
17202 else if (code == 'w')
17204 else if (code == 'k')
17206 else if (code == 'q')
17208 else if (code == 'x')
17210 else if (mode == BLKmode)
17211 /* ... or BLKmode operands, when not overridden. */
17214 switch (GET_MODE_SIZE (mode))
17216 case 1: size = "BYTE"; break;
17217 case 2: size = "WORD"; break;
17218 case 4: size = "DWORD"; break;
17219 case 8: size = "QWORD"; break;
17220 case 12: size = "TBYTE"; break;
17222 if (mode == XFmode)
17227 case 32: size = "YMMWORD"; break;
17228 case 64: size = "ZMMWORD"; break;
17230 gcc_unreachable ();
17234 fputs (size, file);
17235 fputs (" PTR ", file);
17239 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17240 output_operand_lossage ("invalid constraints for operand");
17242 ix86_print_operand_address_as
17243 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17246 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17250 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17252 if (ASSEMBLER_DIALECT == ASM_ATT)
17254 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17256 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17257 (unsigned long long) (int) l);
17259 fprintf (file, "0x%08x", (unsigned int) l);
17262 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17266 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17268 if (ASSEMBLER_DIALECT == ASM_ATT)
17270 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17273 /* These float cases don't actually occur as immediate operands. */
17274 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17278 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17279 fputs (dstr, file);
17284 /* We have patterns that allow zero sets of memory, for instance.
17285 In 64-bit mode, we should probably support all 8-byte vectors,
17286 since we can in fact encode that into an immediate. */
17287 if (GET_CODE (x) == CONST_VECTOR)
17289 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17293 if (code != 'P' && code != 'p')
17295 if (CONST_INT_P (x))
17297 if (ASSEMBLER_DIALECT == ASM_ATT)
17300 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17301 || GET_CODE (x) == LABEL_REF)
17303 if (ASSEMBLER_DIALECT == ASM_ATT)
17306 fputs ("OFFSET FLAT:", file);
17309 if (CONST_INT_P (x))
17310 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17311 else if (flag_pic || MACHOPIC_INDIRECT)
17312 output_pic_addr_const (file, x, code);
17314 output_addr_const (file, x);
17319 ix86_print_operand_punct_valid_p (unsigned char code)
17321 return (code == '@' || code == '*' || code == '+' || code == '&'
17322 || code == ';' || code == '~' || code == '^' || code == '!');
17325 /* Print a memory operand whose address is ADDR. */
17328 ix86_print_operand_address_as (FILE *file, rtx addr,
17329 addr_space_t as, bool no_rip)
17331 struct ix86_address parts;
17332 rtx base, index, disp;
17338 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17340 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17341 gcc_assert (parts.index == NULL_RTX);
17342 parts.index = XVECEXP (addr, 0, 1);
17343 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17344 addr = XVECEXP (addr, 0, 0);
17347 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17349 gcc_assert (TARGET_64BIT);
17350 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17353 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17355 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17356 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17357 if (parts.base != NULL_RTX)
17359 parts.index = parts.base;
17362 parts.base = XVECEXP (addr, 0, 0);
17363 addr = XVECEXP (addr, 0, 0);
17365 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17367 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17368 gcc_assert (parts.index == NULL_RTX);
17369 parts.index = XVECEXP (addr, 0, 1);
17370 addr = XVECEXP (addr, 0, 0);
17373 ok = ix86_decompose_address (addr, &parts);
17378 index = parts.index;
17380 scale = parts.scale;
17382 if (ADDR_SPACE_GENERIC_P (as))
17385 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17387 if (!ADDR_SPACE_GENERIC_P (as))
17389 const char *string;
17391 if (as == ADDR_SPACE_SEG_TLS)
17392 as = DEFAULT_TLS_SEG_REG;
17393 if (as == ADDR_SPACE_SEG_FS)
17394 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17395 else if (as == ADDR_SPACE_SEG_GS)
17396 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17398 gcc_unreachable ();
17399 fputs (string, file);
17402 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17403 if (TARGET_64BIT && !base && !index && !no_rip)
17407 if (GET_CODE (disp) == CONST
17408 && GET_CODE (XEXP (disp, 0)) == PLUS
17409 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17410 symbol = XEXP (XEXP (disp, 0), 0);
17412 if (GET_CODE (symbol) == LABEL_REF
17413 || (GET_CODE (symbol) == SYMBOL_REF
17414 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17418 if (!base && !index)
17420 /* Displacement only requires special attention. */
17421 if (CONST_INT_P (disp))
17423 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
17424 fputs ("ds:", file);
17425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17428 output_pic_addr_const (file, disp, 0);
17430 output_addr_const (file, disp);
17434 /* Print SImode register names to force addr32 prefix. */
17435 if (SImode_address_operand (addr, VOIDmode))
17439 gcc_assert (TARGET_64BIT);
17440 switch (GET_CODE (addr))
17443 gcc_assert (GET_MODE (addr) == SImode);
17444 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17448 gcc_assert (GET_MODE (addr) == DImode);
17451 gcc_unreachable ();
17454 gcc_assert (!code);
17460 && CONST_INT_P (disp)
17461 && INTVAL (disp) < -16*1024*1024)
17463 /* X32 runs in 64-bit mode, where displacement, DISP, in
17464 address DISP(%r64), is encoded as 32-bit immediate sign-
17465 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17466 address is %r64 + 0xffffffffbffffd00. When %r64 <
17467 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17468 which is invalid for x32. The correct address is %r64
17469 - 0x40000300 == 0xf7ffdd64. To properly encode
17470 -0x40000300(%r64) for x32, we zero-extend negative
17471 displacement by forcing addr32 prefix which truncates
17472 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17473 zero-extend all negative displacements, including -1(%rsp).
17474 However, for small negative displacements, sign-extension
17475 won't cause overflow. We only zero-extend negative
17476 displacements if they < -16*1024*1024, which is also used
17477 to check legitimate address displacements for PIC. */
17481 if (ASSEMBLER_DIALECT == ASM_ATT)
17486 output_pic_addr_const (file, disp, 0);
17487 else if (GET_CODE (disp) == LABEL_REF)
17488 output_asm_label (disp);
17490 output_addr_const (file, disp);
17495 print_reg (base, code, file);
17499 print_reg (index, vsib ? 0 : code, file);
17500 if (scale != 1 || vsib)
17501 fprintf (file, ",%d", scale);
17507 rtx offset = NULL_RTX;
17511 /* Pull out the offset of a symbol; print any symbol itself. */
17512 if (GET_CODE (disp) == CONST
17513 && GET_CODE (XEXP (disp, 0)) == PLUS
17514 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17516 offset = XEXP (XEXP (disp, 0), 1);
17517 disp = gen_rtx_CONST (VOIDmode,
17518 XEXP (XEXP (disp, 0), 0));
17522 output_pic_addr_const (file, disp, 0);
17523 else if (GET_CODE (disp) == LABEL_REF)
17524 output_asm_label (disp);
17525 else if (CONST_INT_P (disp))
17528 output_addr_const (file, disp);
17534 print_reg (base, code, file);
17537 if (INTVAL (offset) >= 0)
17539 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17543 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17550 print_reg (index, vsib ? 0 : code, file);
17551 if (scale != 1 || vsib)
17552 fprintf (file, "*%d", scale);
17560 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
17562 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
17565 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17568 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17572 if (GET_CODE (x) != UNSPEC)
17575 op = XVECEXP (x, 0, 0);
17576 switch (XINT (x, 1))
17578 case UNSPEC_GOTTPOFF:
17579 output_addr_const (file, op);
17580 /* FIXME: This might be @TPOFF in Sun ld. */
17581 fputs ("@gottpoff", file);
17584 output_addr_const (file, op);
17585 fputs ("@tpoff", file);
17587 case UNSPEC_NTPOFF:
17588 output_addr_const (file, op);
17590 fputs ("@tpoff", file);
17592 fputs ("@ntpoff", file);
17594 case UNSPEC_DTPOFF:
17595 output_addr_const (file, op);
17596 fputs ("@dtpoff", file);
17598 case UNSPEC_GOTNTPOFF:
17599 output_addr_const (file, op);
17601 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17602 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17604 fputs ("@gotntpoff", file);
17606 case UNSPEC_INDNTPOFF:
17607 output_addr_const (file, op);
17608 fputs ("@indntpoff", file);
17611 case UNSPEC_MACHOPIC_OFFSET:
17612 output_addr_const (file, op);
17614 machopic_output_function_base_name (file);
17618 case UNSPEC_STACK_CHECK:
17622 gcc_assert (flag_split_stack);
17624 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17625 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17627 gcc_unreachable ();
17630 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17641 /* Split one or more double-mode RTL references into pairs of half-mode
17642 references. The RTL can be REG, offsettable MEM, integer constant, or
17643 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17644 split and "num" is its length. lo_half and hi_half are output arrays
17645 that parallel "operands". */
17648 split_double_mode (machine_mode mode, rtx operands[],
17649 int num, rtx lo_half[], rtx hi_half[])
17651 machine_mode half_mode;
17657 half_mode = DImode;
17660 half_mode = SImode;
17663 gcc_unreachable ();
17666 byte = GET_MODE_SIZE (half_mode);
17670 rtx op = operands[num];
17672 /* simplify_subreg refuse to split volatile memory addresses,
17673 but we still have to handle it. */
17676 lo_half[num] = adjust_address (op, half_mode, 0);
17677 hi_half[num] = adjust_address (op, half_mode, byte);
17681 lo_half[num] = simplify_gen_subreg (half_mode, op,
17682 GET_MODE (op) == VOIDmode
17683 ? mode : GET_MODE (op), 0);
17684 hi_half[num] = simplify_gen_subreg (half_mode, op,
17685 GET_MODE (op) == VOIDmode
17686 ? mode : GET_MODE (op), byte);
17691 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17692 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17693 is the expression of the binary operation. The output may either be
17694 emitted here, or returned to the caller, like all output_* functions.
17696 There is no guarantee that the operands are the same mode, as they
17697 might be within FLOAT or FLOAT_EXTEND expressions. */
17699 #ifndef SYSV386_COMPAT
17700 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17701 wants to fix the assemblers because that causes incompatibility
17702 with gcc. No-one wants to fix gcc because that causes
17703 incompatibility with assemblers... You can use the option of
17704 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17705 #define SYSV386_COMPAT 1
17709 output_387_binary_op (rtx insn, rtx *operands)
17711 static char buf[40];
17714 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17716 /* Even if we do not want to check the inputs, this documents input
17717 constraints. Which helps in understanding the following code. */
17720 if (STACK_REG_P (operands[0])
17721 && ((REG_P (operands[1])
17722 && REGNO (operands[0]) == REGNO (operands[1])
17723 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17724 || (REG_P (operands[2])
17725 && REGNO (operands[0]) == REGNO (operands[2])
17726 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17727 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17730 gcc_assert (is_sse);
17733 switch (GET_CODE (operands[3]))
17736 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17737 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17745 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17746 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17754 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17755 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17763 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17764 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17772 gcc_unreachable ();
17779 strcpy (buf, ssep);
17780 if (GET_MODE (operands[0]) == SFmode)
17781 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17783 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17787 strcpy (buf, ssep + 1);
17788 if (GET_MODE (operands[0]) == SFmode)
17789 strcat (buf, "ss\t{%2, %0|%0, %2}");
17791 strcat (buf, "sd\t{%2, %0|%0, %2}");
17797 switch (GET_CODE (operands[3]))
17801 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17802 std::swap (operands[1], operands[2]);
17804 /* know operands[0] == operands[1]. */
17806 if (MEM_P (operands[2]))
17812 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17814 if (STACK_TOP_P (operands[0]))
17815 /* How is it that we are storing to a dead operand[2]?
17816 Well, presumably operands[1] is dead too. We can't
17817 store the result to st(0) as st(0) gets popped on this
17818 instruction. Instead store to operands[2] (which I
17819 think has to be st(1)). st(1) will be popped later.
17820 gcc <= 2.8.1 didn't have this check and generated
17821 assembly code that the Unixware assembler rejected. */
17822 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17824 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17828 if (STACK_TOP_P (operands[0]))
17829 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17831 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17836 if (MEM_P (operands[1]))
17842 if (MEM_P (operands[2]))
17848 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17851 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17852 derived assemblers, confusingly reverse the direction of
17853 the operation for fsub{r} and fdiv{r} when the
17854 destination register is not st(0). The Intel assembler
17855 doesn't have this brain damage. Read !SYSV386_COMPAT to
17856 figure out what the hardware really does. */
17857 if (STACK_TOP_P (operands[0]))
17858 p = "{p\t%0, %2|rp\t%2, %0}";
17860 p = "{rp\t%2, %0|p\t%0, %2}";
17862 if (STACK_TOP_P (operands[0]))
17863 /* As above for fmul/fadd, we can't store to st(0). */
17864 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17866 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17871 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17874 if (STACK_TOP_P (operands[0]))
17875 p = "{rp\t%0, %1|p\t%1, %0}";
17877 p = "{p\t%1, %0|rp\t%0, %1}";
17879 if (STACK_TOP_P (operands[0]))
17880 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17882 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17887 if (STACK_TOP_P (operands[0]))
17889 if (STACK_TOP_P (operands[1]))
17890 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17892 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17895 else if (STACK_TOP_P (operands[1]))
17898 p = "{\t%1, %0|r\t%0, %1}";
17900 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17906 p = "{r\t%2, %0|\t%0, %2}";
17908 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17914 gcc_unreachable ();
17921 /* Check if a 256bit AVX register is referenced inside of EXP. */
17924 ix86_check_avx256_register (const_rtx exp)
17926 if (SUBREG_P (exp))
17927 exp = SUBREG_REG (exp);
17929 return (REG_P (exp)
17930 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17933 /* Return needed mode for entity in optimize_mode_switching pass. */
17936 ix86_avx_u128_mode_needed (rtx_insn *insn)
17942 /* Needed mode is set to AVX_U128_CLEAN if there are
17943 no 256bit modes used in function arguments. */
17944 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17946 link = XEXP (link, 1))
17948 if (GET_CODE (XEXP (link, 0)) == USE)
17950 rtx arg = XEXP (XEXP (link, 0), 0);
17952 if (ix86_check_avx256_register (arg))
17953 return AVX_U128_DIRTY;
17957 return AVX_U128_CLEAN;
17960 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17961 changes state only when a 256bit register is written to, but we need
17962 to prevent the compiler from moving optimal insertion point above
17963 eventual read from 256bit register. */
17964 subrtx_iterator::array_type array;
17965 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17966 if (ix86_check_avx256_register (*iter))
17967 return AVX_U128_DIRTY;
17969 return AVX_U128_ANY;
17972 /* Return mode that i387 must be switched into
17973 prior to the execution of insn. */
17976 ix86_i387_mode_needed (int entity, rtx_insn *insn)
17978 enum attr_i387_cw mode;
17980 /* The mode UNINITIALIZED is used to store control word after a
17981 function call or ASM pattern. The mode ANY specify that function
17982 has no requirements on the control word and make no changes in the
17983 bits we are interested in. */
17986 || (NONJUMP_INSN_P (insn)
17987 && (asm_noperands (PATTERN (insn)) >= 0
17988 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
17989 return I387_CW_UNINITIALIZED;
17991 if (recog_memoized (insn) < 0)
17992 return I387_CW_ANY;
17994 mode = get_attr_i387_cw (insn);
17999 if (mode == I387_CW_TRUNC)
18004 if (mode == I387_CW_FLOOR)
18009 if (mode == I387_CW_CEIL)
18014 if (mode == I387_CW_MASK_PM)
18019 gcc_unreachable ();
18022 return I387_CW_ANY;
18025 /* Return mode that entity must be switched into
18026 prior to the execution of insn. */
18029 ix86_mode_needed (int entity, rtx_insn *insn)
18034 return ix86_avx_u128_mode_needed (insn);
18039 return ix86_i387_mode_needed (entity, insn);
18041 gcc_unreachable ();
18046 /* Check if a 256bit AVX register is referenced in stores. */
18049 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18051 if (ix86_check_avx256_register (dest))
18053 bool *used = (bool *) data;
18058 /* Calculate mode of upper 128bit AVX registers after the insn. */
18061 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18063 rtx pat = PATTERN (insn);
18065 if (vzeroupper_operation (pat, VOIDmode)
18066 || vzeroall_operation (pat, VOIDmode))
18067 return AVX_U128_CLEAN;
18069 /* We know that state is clean after CALL insn if there are no
18070 256bit registers used in the function return register. */
18073 bool avx_reg256_found = false;
18074 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18076 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18079 /* Otherwise, return current mode. Remember that if insn
18080 references AVX 256bit registers, the mode was already changed
18081 to DIRTY from MODE_NEEDED. */
18085 /* Return the mode that an insn results in. */
18088 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18093 return ix86_avx_u128_mode_after (mode, insn);
18100 gcc_unreachable ();
18105 ix86_avx_u128_mode_entry (void)
18109 /* Entry mode is set to AVX_U128_DIRTY if there are
18110 256bit modes used in function arguments. */
18111 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18112 arg = TREE_CHAIN (arg))
18114 rtx incoming = DECL_INCOMING_RTL (arg);
18116 if (incoming && ix86_check_avx256_register (incoming))
18117 return AVX_U128_DIRTY;
18120 return AVX_U128_CLEAN;
18123 /* Return a mode that ENTITY is assumed to be
18124 switched to at function entry. */
18127 ix86_mode_entry (int entity)
18132 return ix86_avx_u128_mode_entry ();
18137 return I387_CW_ANY;
18139 gcc_unreachable ();
18144 ix86_avx_u128_mode_exit (void)
18146 rtx reg = crtl->return_rtx;
18148 /* Exit mode is set to AVX_U128_DIRTY if there are
18149 256bit modes used in the function return register. */
18150 if (reg && ix86_check_avx256_register (reg))
18151 return AVX_U128_DIRTY;
18153 return AVX_U128_CLEAN;
18156 /* Return a mode that ENTITY is assumed to be
18157 switched to at function exit. */
18160 ix86_mode_exit (int entity)
18165 return ix86_avx_u128_mode_exit ();
18170 return I387_CW_ANY;
18172 gcc_unreachable ();
18177 ix86_mode_priority (int, int n)
18182 /* Output code to initialize control word copies used by trunc?f?i and
18183 rounding patterns. CURRENT_MODE is set to current control word,
18184 while NEW_MODE is set to new control word. */
18187 emit_i387_cw_initialization (int mode)
18189 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18192 enum ix86_stack_slot slot;
18194 rtx reg = gen_reg_rtx (HImode);
18196 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18197 emit_move_insn (reg, copy_rtx (stored_mode));
18199 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18200 || optimize_insn_for_size_p ())
18204 case I387_CW_TRUNC:
18205 /* round toward zero (truncate) */
18206 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18207 slot = SLOT_CW_TRUNC;
18210 case I387_CW_FLOOR:
18211 /* round down toward -oo */
18212 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18213 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18214 slot = SLOT_CW_FLOOR;
18218 /* round up toward +oo */
18219 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18220 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18221 slot = SLOT_CW_CEIL;
18224 case I387_CW_MASK_PM:
18225 /* mask precision exception for nearbyint() */
18226 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18227 slot = SLOT_CW_MASK_PM;
18231 gcc_unreachable ();
18238 case I387_CW_TRUNC:
18239 /* round toward zero (truncate) */
18240 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18241 slot = SLOT_CW_TRUNC;
18244 case I387_CW_FLOOR:
18245 /* round down toward -oo */
18246 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18247 slot = SLOT_CW_FLOOR;
18251 /* round up toward +oo */
18252 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18253 slot = SLOT_CW_CEIL;
18256 case I387_CW_MASK_PM:
18257 /* mask precision exception for nearbyint() */
18258 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18259 slot = SLOT_CW_MASK_PM;
18263 gcc_unreachable ();
18267 gcc_assert (slot < MAX_386_STACK_LOCALS);
18269 new_mode = assign_386_stack_local (HImode, slot);
18270 emit_move_insn (new_mode, reg);
18273 /* Emit vzeroupper. */
18276 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18280 /* Cancel automatic vzeroupper insertion if there are
18281 live call-saved SSE registers at the insertion point. */
18283 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18284 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18288 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18289 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18292 emit_insn (gen_avx_vzeroupper ());
18295 /* Generate one or more insns to set ENTITY to MODE. */
18297 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18298 is the set of hard registers live at the point where the insn(s)
18299 are to be inserted. */
18302 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18303 HARD_REG_SET regs_live)
18308 if (mode == AVX_U128_CLEAN)
18309 ix86_avx_emit_vzeroupper (regs_live);
18315 if (mode != I387_CW_ANY
18316 && mode != I387_CW_UNINITIALIZED)
18317 emit_i387_cw_initialization (mode);
18320 gcc_unreachable ();
18324 /* Output code for INSN to convert a float to a signed int. OPERANDS
18325 are the insn operands. The output may be [HSD]Imode and the input
18326 operand may be [SDX]Fmode. */
18329 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18331 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18332 int dimode_p = GET_MODE (operands[0]) == DImode;
18333 int round_mode = get_attr_i387_cw (insn);
18335 /* Jump through a hoop or two for DImode, since the hardware has no
18336 non-popping instruction. We used to do this a different way, but
18337 that was somewhat fragile and broke with post-reload splitters. */
18338 if ((dimode_p || fisttp) && !stack_top_dies)
18339 output_asm_insn ("fld\t%y1", operands);
18341 gcc_assert (STACK_TOP_P (operands[1]));
18342 gcc_assert (MEM_P (operands[0]));
18343 gcc_assert (GET_MODE (operands[1]) != TFmode);
18346 output_asm_insn ("fisttp%Z0\t%0", operands);
18349 if (round_mode != I387_CW_ANY)
18350 output_asm_insn ("fldcw\t%3", operands);
18351 if (stack_top_dies || dimode_p)
18352 output_asm_insn ("fistp%Z0\t%0", operands);
18354 output_asm_insn ("fist%Z0\t%0", operands);
18355 if (round_mode != I387_CW_ANY)
18356 output_asm_insn ("fldcw\t%2", operands);
18362 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18363 have the values zero or one, indicates the ffreep insn's operand
18364 from the OPERANDS array. */
18366 static const char *
18367 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18369 if (TARGET_USE_FFREEP)
18370 #ifdef HAVE_AS_IX86_FFREEP
18371 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18374 static char retval[32];
18375 int regno = REGNO (operands[opno]);
18377 gcc_assert (STACK_REGNO_P (regno));
18379 regno -= FIRST_STACK_REG;
18381 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18386 return opno ? "fstp\t%y1" : "fstp\t%y0";
18390 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18391 should be used. UNORDERED_P is true when fucom should be used. */
18394 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18396 int stack_top_dies;
18397 rtx cmp_op0, cmp_op1;
18398 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18402 cmp_op0 = operands[0];
18403 cmp_op1 = operands[1];
18407 cmp_op0 = operands[1];
18408 cmp_op1 = operands[2];
18413 if (GET_MODE (operands[0]) == SFmode)
18415 return "%vucomiss\t{%1, %0|%0, %1}";
18417 return "%vcomiss\t{%1, %0|%0, %1}";
18420 return "%vucomisd\t{%1, %0|%0, %1}";
18422 return "%vcomisd\t{%1, %0|%0, %1}";
18425 gcc_assert (STACK_TOP_P (cmp_op0));
18427 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18429 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18431 if (stack_top_dies)
18433 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18434 return output_387_ffreep (operands, 1);
18437 return "ftst\n\tfnstsw\t%0";
18440 if (STACK_REG_P (cmp_op1)
18442 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18443 && REGNO (cmp_op1) != FIRST_STACK_REG)
18445 /* If both the top of the 387 stack dies, and the other operand
18446 is also a stack register that dies, then this must be a
18447 `fcompp' float compare */
18451 /* There is no double popping fcomi variant. Fortunately,
18452 eflags is immune from the fstp's cc clobbering. */
18454 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18456 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18457 return output_387_ffreep (operands, 0);
18462 return "fucompp\n\tfnstsw\t%0";
18464 return "fcompp\n\tfnstsw\t%0";
18469 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18471 static const char * const alt[16] =
18473 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18474 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18475 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18476 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18478 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18479 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18483 "fcomi\t{%y1, %0|%0, %y1}",
18484 "fcomip\t{%y1, %0|%0, %y1}",
18485 "fucomi\t{%y1, %0|%0, %y1}",
18486 "fucomip\t{%y1, %0|%0, %y1}",
18497 mask = eflags_p << 3;
18498 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18499 mask |= unordered_p << 1;
18500 mask |= stack_top_dies;
18502 gcc_assert (mask < 16);
18511 ix86_output_addr_vec_elt (FILE *file, int value)
18513 const char *directive = ASM_LONG;
18517 directive = ASM_QUAD;
18519 gcc_assert (!TARGET_64BIT);
18522 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18526 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18528 const char *directive = ASM_LONG;
18531 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18532 directive = ASM_QUAD;
18534 gcc_assert (!TARGET_64BIT);
18536 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18537 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18538 fprintf (file, "%s%s%d-%s%d\n",
18539 directive, LPREFIX, value, LPREFIX, rel);
18540 else if (HAVE_AS_GOTOFF_IN_DATA)
18541 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18543 else if (TARGET_MACHO)
18545 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18546 machopic_output_function_base_name (file);
18551 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18552 GOT_SYMBOL_NAME, LPREFIX, value);
18555 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18559 ix86_expand_clear (rtx dest)
18563 /* We play register width games, which are only valid after reload. */
18564 gcc_assert (reload_completed);
18566 /* Avoid HImode and its attendant prefix byte. */
18567 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18568 dest = gen_rtx_REG (SImode, REGNO (dest));
18569 tmp = gen_rtx_SET (dest, const0_rtx);
18571 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18573 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18574 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18580 /* X is an unchanging MEM. If it is a constant pool reference, return
18581 the constant pool rtx, else NULL. */
18584 maybe_get_pool_constant (rtx x)
18586 x = ix86_delegitimize_address (XEXP (x, 0));
18588 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18589 return get_pool_constant (x);
18595 ix86_expand_move (machine_mode mode, rtx operands[])
18598 enum tls_model model;
18603 if (GET_CODE (op1) == SYMBOL_REF)
18607 model = SYMBOL_REF_TLS_MODEL (op1);
18610 op1 = legitimize_tls_address (op1, model, true);
18611 op1 = force_operand (op1, op0);
18614 op1 = convert_to_mode (mode, op1, 1);
18616 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18619 else if (GET_CODE (op1) == CONST
18620 && GET_CODE (XEXP (op1, 0)) == PLUS
18621 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18623 rtx addend = XEXP (XEXP (op1, 0), 1);
18624 rtx symbol = XEXP (XEXP (op1, 0), 0);
18627 model = SYMBOL_REF_TLS_MODEL (symbol);
18629 tmp = legitimize_tls_address (symbol, model, true);
18631 tmp = legitimize_pe_coff_symbol (symbol, true);
18635 tmp = force_operand (tmp, NULL);
18636 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18637 op0, 1, OPTAB_DIRECT);
18640 op1 = convert_to_mode (mode, tmp, 1);
18644 if ((flag_pic || MACHOPIC_INDIRECT)
18645 && symbolic_operand (op1, mode))
18647 if (TARGET_MACHO && !TARGET_64BIT)
18650 /* dynamic-no-pic */
18651 if (MACHOPIC_INDIRECT)
18653 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18654 ? op0 : gen_reg_rtx (Pmode);
18655 op1 = machopic_indirect_data_reference (op1, temp);
18657 op1 = machopic_legitimize_pic_address (op1, mode,
18658 temp == op1 ? 0 : temp);
18660 if (op0 != op1 && GET_CODE (op0) != MEM)
18662 rtx insn = gen_rtx_SET (op0, op1);
18666 if (GET_CODE (op0) == MEM)
18667 op1 = force_reg (Pmode, op1);
18671 if (GET_CODE (temp) != REG)
18672 temp = gen_reg_rtx (Pmode);
18673 temp = legitimize_pic_address (op1, temp);
18678 /* dynamic-no-pic */
18684 op1 = force_reg (mode, op1);
18685 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18687 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18688 op1 = legitimize_pic_address (op1, reg);
18691 op1 = convert_to_mode (mode, op1, 1);
18698 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18699 || !push_operand (op0, mode))
18701 op1 = force_reg (mode, op1);
18703 if (push_operand (op0, mode)
18704 && ! general_no_elim_operand (op1, mode))
18705 op1 = copy_to_mode_reg (mode, op1);
18707 /* Force large constants in 64bit compilation into register
18708 to get them CSEed. */
18709 if (can_create_pseudo_p ()
18710 && (mode == DImode) && TARGET_64BIT
18711 && immediate_operand (op1, mode)
18712 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18713 && !register_operand (op0, mode)
18715 op1 = copy_to_mode_reg (mode, op1);
18717 if (can_create_pseudo_p ()
18718 && CONST_DOUBLE_P (op1))
18720 /* If we are loading a floating point constant to a register,
18721 force the value to memory now, since we'll get better code
18722 out the back end. */
18724 op1 = validize_mem (force_const_mem (mode, op1));
18725 if (!register_operand (op0, mode))
18727 rtx temp = gen_reg_rtx (mode);
18728 emit_insn (gen_rtx_SET (temp, op1));
18729 emit_move_insn (op0, temp);
18735 emit_insn (gen_rtx_SET (op0, op1));
18739 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18741 rtx op0 = operands[0], op1 = operands[1];
18742 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18743 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18744 unsigned int align = (TARGET_IAMCU
18745 ? GET_MODE_BITSIZE (mode)
18746 : GET_MODE_ALIGNMENT (mode));
18748 if (push_operand (op0, VOIDmode))
18749 op0 = emit_move_resolve_push (mode, op0);
18751 /* Force constants other than zero into memory. We do not know how
18752 the instructions used to build constants modify the upper 64 bits
18753 of the register, once we have that information we may be able
18754 to handle some of them more efficiently. */
18755 if (can_create_pseudo_p ()
18756 && register_operand (op0, mode)
18757 && (CONSTANT_P (op1)
18759 && CONSTANT_P (SUBREG_REG (op1))))
18760 && !standard_sse_constant_p (op1))
18761 op1 = validize_mem (force_const_mem (mode, op1));
18763 /* We need to check memory alignment for SSE mode since attribute
18764 can make operands unaligned. */
18765 if (can_create_pseudo_p ()
18766 && SSE_REG_MODE_P (mode)
18767 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18768 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18772 /* ix86_expand_vector_move_misalign() does not like constants ... */
18773 if (CONSTANT_P (op1)
18775 && CONSTANT_P (SUBREG_REG (op1))))
18776 op1 = validize_mem (force_const_mem (mode, op1));
18778 /* ... nor both arguments in memory. */
18779 if (!register_operand (op0, mode)
18780 && !register_operand (op1, mode))
18781 op1 = force_reg (mode, op1);
18783 tmp[0] = op0; tmp[1] = op1;
18784 ix86_expand_vector_move_misalign (mode, tmp);
18788 /* Make operand1 a register if it isn't already. */
18789 if (can_create_pseudo_p ()
18790 && !register_operand (op0, mode)
18791 && !register_operand (op1, mode))
18793 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18797 emit_insn (gen_rtx_SET (op0, op1));
18800 /* Split 32-byte AVX unaligned load and store if needed. */
18803 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18806 rtx (*extract) (rtx, rtx, rtx);
18807 rtx (*load_unaligned) (rtx, rtx);
18808 rtx (*store_unaligned) (rtx, rtx);
18811 switch (GET_MODE (op0))
18814 gcc_unreachable ();
18816 extract = gen_avx_vextractf128v32qi;
18817 load_unaligned = gen_avx_loaddquv32qi;
18818 store_unaligned = gen_avx_storedquv32qi;
18822 extract = gen_avx_vextractf128v8sf;
18823 load_unaligned = gen_avx_loadups256;
18824 store_unaligned = gen_avx_storeups256;
18828 extract = gen_avx_vextractf128v4df;
18829 load_unaligned = gen_avx_loadupd256;
18830 store_unaligned = gen_avx_storeupd256;
18837 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18838 && optimize_insn_for_speed_p ())
18840 rtx r = gen_reg_rtx (mode);
18841 m = adjust_address (op1, mode, 0);
18842 emit_move_insn (r, m);
18843 m = adjust_address (op1, mode, 16);
18844 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18845 emit_move_insn (op0, r);
18847 /* Normal *mov<mode>_internal pattern will handle
18848 unaligned loads just fine if misaligned_operand
18849 is true, and without the UNSPEC it can be combined
18850 with arithmetic instructions. */
18851 else if (misaligned_operand (op1, GET_MODE (op1)))
18852 emit_insn (gen_rtx_SET (op0, op1));
18854 emit_insn (load_unaligned (op0, op1));
18856 else if (MEM_P (op0))
18858 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18859 && optimize_insn_for_speed_p ())
18861 m = adjust_address (op0, mode, 0);
18862 emit_insn (extract (m, op1, const0_rtx));
18863 m = adjust_address (op0, mode, 16);
18864 emit_insn (extract (m, op1, const1_rtx));
18867 emit_insn (store_unaligned (op0, op1));
18870 gcc_unreachable ();
18873 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18874 straight to ix86_expand_vector_move. */
18875 /* Code generation for scalar reg-reg moves of single and double precision data:
18876 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18880 if (x86_sse_partial_reg_dependency == true)
18885 Code generation for scalar loads of double precision data:
18886 if (x86_sse_split_regs == true)
18887 movlpd mem, reg (gas syntax)
18891 Code generation for unaligned packed loads of single precision data
18892 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18893 if (x86_sse_unaligned_move_optimal)
18896 if (x86_sse_partial_reg_dependency == true)
18908 Code generation for unaligned packed loads of double precision data
18909 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18910 if (x86_sse_unaligned_move_optimal)
18913 if (x86_sse_split_regs == true)
18926 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18928 rtx op0, op1, orig_op0 = NULL_RTX, m;
18929 rtx (*load_unaligned) (rtx, rtx);
18930 rtx (*store_unaligned) (rtx, rtx);
18935 if (GET_MODE_SIZE (mode) == 64)
18937 switch (GET_MODE_CLASS (mode))
18939 case MODE_VECTOR_INT:
18941 if (GET_MODE (op0) != V16SImode)
18946 op0 = gen_reg_rtx (V16SImode);
18949 op0 = gen_lowpart (V16SImode, op0);
18951 op1 = gen_lowpart (V16SImode, op1);
18954 case MODE_VECTOR_FLOAT:
18955 switch (GET_MODE (op0))
18958 gcc_unreachable ();
18960 load_unaligned = gen_avx512f_loaddquv16si;
18961 store_unaligned = gen_avx512f_storedquv16si;
18964 load_unaligned = gen_avx512f_loadups512;
18965 store_unaligned = gen_avx512f_storeups512;
18968 load_unaligned = gen_avx512f_loadupd512;
18969 store_unaligned = gen_avx512f_storeupd512;
18974 emit_insn (load_unaligned (op0, op1));
18975 else if (MEM_P (op0))
18976 emit_insn (store_unaligned (op0, op1));
18978 gcc_unreachable ();
18980 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18984 gcc_unreachable ();
18991 && GET_MODE_SIZE (mode) == 32)
18993 switch (GET_MODE_CLASS (mode))
18995 case MODE_VECTOR_INT:
18997 if (GET_MODE (op0) != V32QImode)
19002 op0 = gen_reg_rtx (V32QImode);
19005 op0 = gen_lowpart (V32QImode, op0);
19007 op1 = gen_lowpart (V32QImode, op1);
19010 case MODE_VECTOR_FLOAT:
19011 ix86_avx256_split_vector_move_misalign (op0, op1);
19013 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19017 gcc_unreachable ();
19025 /* Normal *mov<mode>_internal pattern will handle
19026 unaligned loads just fine if misaligned_operand
19027 is true, and without the UNSPEC it can be combined
19028 with arithmetic instructions. */
19030 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
19031 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19032 && misaligned_operand (op1, GET_MODE (op1)))
19033 emit_insn (gen_rtx_SET (op0, op1));
19034 /* ??? If we have typed data, then it would appear that using
19035 movdqu is the only way to get unaligned data loaded with
19037 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19039 if (GET_MODE (op0) != V16QImode)
19042 op0 = gen_reg_rtx (V16QImode);
19044 op1 = gen_lowpart (V16QImode, op1);
19045 /* We will eventually emit movups based on insn attributes. */
19046 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
19048 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19050 else if (TARGET_SSE2 && mode == V2DFmode)
19055 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19056 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19057 || optimize_insn_for_size_p ())
19059 /* We will eventually emit movups based on insn attributes. */
19060 emit_insn (gen_sse2_loadupd (op0, op1));
19064 /* When SSE registers are split into halves, we can avoid
19065 writing to the top half twice. */
19066 if (TARGET_SSE_SPLIT_REGS)
19068 emit_clobber (op0);
19073 /* ??? Not sure about the best option for the Intel chips.
19074 The following would seem to satisfy; the register is
19075 entirely cleared, breaking the dependency chain. We
19076 then store to the upper half, with a dependency depth
19077 of one. A rumor has it that Intel recommends two movsd
19078 followed by an unpacklpd, but this is unconfirmed. And
19079 given that the dependency depth of the unpacklpd would
19080 still be one, I'm not sure why this would be better. */
19081 zero = CONST0_RTX (V2DFmode);
19084 m = adjust_address (op1, DFmode, 0);
19085 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19086 m = adjust_address (op1, DFmode, 8);
19087 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19094 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19095 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19096 || optimize_insn_for_size_p ())
19098 if (GET_MODE (op0) != V4SFmode)
19101 op0 = gen_reg_rtx (V4SFmode);
19103 op1 = gen_lowpart (V4SFmode, op1);
19104 emit_insn (gen_sse_loadups (op0, op1));
19106 emit_move_insn (orig_op0,
19107 gen_lowpart (GET_MODE (orig_op0), op0));
19111 if (mode != V4SFmode)
19112 t = gen_reg_rtx (V4SFmode);
19116 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19117 emit_move_insn (t, CONST0_RTX (V4SFmode));
19121 m = adjust_address (op1, V2SFmode, 0);
19122 emit_insn (gen_sse_loadlps (t, t, m));
19123 m = adjust_address (op1, V2SFmode, 8);
19124 emit_insn (gen_sse_loadhps (t, t, m));
19125 if (mode != V4SFmode)
19126 emit_move_insn (op0, gen_lowpart (mode, t));
19129 else if (MEM_P (op0))
19131 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19133 op0 = gen_lowpart (V16QImode, op0);
19134 op1 = gen_lowpart (V16QImode, op1);
19135 /* We will eventually emit movups based on insn attributes. */
19136 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19138 else if (TARGET_SSE2 && mode == V2DFmode)
19141 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19142 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19143 || optimize_insn_for_size_p ())
19144 /* We will eventually emit movups based on insn attributes. */
19145 emit_insn (gen_sse2_storeupd (op0, op1));
19148 m = adjust_address (op0, DFmode, 0);
19149 emit_insn (gen_sse2_storelpd (m, op1));
19150 m = adjust_address (op0, DFmode, 8);
19151 emit_insn (gen_sse2_storehpd (m, op1));
19156 if (mode != V4SFmode)
19157 op1 = gen_lowpart (V4SFmode, op1);
19160 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19161 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19162 || optimize_insn_for_size_p ())
19164 op0 = gen_lowpart (V4SFmode, op0);
19165 emit_insn (gen_sse_storeups (op0, op1));
19169 m = adjust_address (op0, V2SFmode, 0);
19170 emit_insn (gen_sse_storelps (m, op1));
19171 m = adjust_address (op0, V2SFmode, 8);
19172 emit_insn (gen_sse_storehps (m, op1));
19177 gcc_unreachable ();
19180 /* Helper function of ix86_fixup_binary_operands to canonicalize
19181 operand order. Returns true if the operands should be swapped. */
19184 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19187 rtx dst = operands[0];
19188 rtx src1 = operands[1];
19189 rtx src2 = operands[2];
19191 /* If the operation is not commutative, we can't do anything. */
19192 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19195 /* Highest priority is that src1 should match dst. */
19196 if (rtx_equal_p (dst, src1))
19198 if (rtx_equal_p (dst, src2))
19201 /* Next highest priority is that immediate constants come second. */
19202 if (immediate_operand (src2, mode))
19204 if (immediate_operand (src1, mode))
19207 /* Lowest priority is that memory references should come second. */
19217 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19218 destination to use for the operation. If different from the true
19219 destination in operands[0], a copy operation will be required. */
19222 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19225 rtx dst = operands[0];
19226 rtx src1 = operands[1];
19227 rtx src2 = operands[2];
19229 /* Canonicalize operand order. */
19230 if (ix86_swap_binary_operands_p (code, mode, operands))
19232 /* It is invalid to swap operands of different modes. */
19233 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19235 std::swap (src1, src2);
19238 /* Both source operands cannot be in memory. */
19239 if (MEM_P (src1) && MEM_P (src2))
19241 /* Optimization: Only read from memory once. */
19242 if (rtx_equal_p (src1, src2))
19244 src2 = force_reg (mode, src2);
19247 else if (rtx_equal_p (dst, src1))
19248 src2 = force_reg (mode, src2);
19250 src1 = force_reg (mode, src1);
19253 /* If the destination is memory, and we do not have matching source
19254 operands, do things in registers. */
19255 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19256 dst = gen_reg_rtx (mode);
19258 /* Source 1 cannot be a constant. */
19259 if (CONSTANT_P (src1))
19260 src1 = force_reg (mode, src1);
19262 /* Source 1 cannot be a non-matching memory. */
19263 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19264 src1 = force_reg (mode, src1);
19266 /* Improve address combine. */
19268 && GET_MODE_CLASS (mode) == MODE_INT
19270 src2 = force_reg (mode, src2);
19272 operands[1] = src1;
19273 operands[2] = src2;
19277 /* Similarly, but assume that the destination has already been
19278 set up properly. */
19281 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19282 machine_mode mode, rtx operands[])
19284 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19285 gcc_assert (dst == operands[0]);
19288 /* Attempt to expand a binary operator. Make the expansion closer to the
19289 actual machine, then just general_operand, which will allow 3 separate
19290 memory references (one output, two input) in a single insn. */
19293 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19296 rtx src1, src2, dst, op, clob;
19298 dst = ix86_fixup_binary_operands (code, mode, operands);
19299 src1 = operands[1];
19300 src2 = operands[2];
19302 /* Emit the instruction. */
19304 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19306 if (reload_completed
19308 && !rtx_equal_p (dst, src1))
19310 /* This is going to be an LEA; avoid splitting it later. */
19315 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19316 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19319 /* Fix up the destination if needed. */
19320 if (dst != operands[0])
19321 emit_move_insn (operands[0], dst);
19324 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19325 the given OPERANDS. */
19328 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19331 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19332 if (SUBREG_P (operands[1]))
19337 else if (SUBREG_P (operands[2]))
19342 /* Optimize (__m128i) d | (__m128i) e and similar code
19343 when d and e are float vectors into float vector logical
19344 insn. In C/C++ without using intrinsics there is no other way
19345 to express vector logical operation on float vectors than
19346 to cast them temporarily to integer vectors. */
19348 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19349 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19350 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19351 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19352 && SUBREG_BYTE (op1) == 0
19353 && (GET_CODE (op2) == CONST_VECTOR
19354 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19355 && SUBREG_BYTE (op2) == 0))
19356 && can_create_pseudo_p ())
19359 switch (GET_MODE (SUBREG_REG (op1)))
19367 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19368 if (GET_CODE (op2) == CONST_VECTOR)
19370 op2 = gen_lowpart (GET_MODE (dst), op2);
19371 op2 = force_reg (GET_MODE (dst), op2);
19376 op2 = SUBREG_REG (operands[2]);
19377 if (!vector_operand (op2, GET_MODE (dst)))
19378 op2 = force_reg (GET_MODE (dst), op2);
19380 op1 = SUBREG_REG (op1);
19381 if (!vector_operand (op1, GET_MODE (dst)))
19382 op1 = force_reg (GET_MODE (dst), op1);
19383 emit_insn (gen_rtx_SET (dst,
19384 gen_rtx_fmt_ee (code, GET_MODE (dst),
19386 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19392 if (!vector_operand (operands[1], mode))
19393 operands[1] = force_reg (mode, operands[1]);
19394 if (!vector_operand (operands[2], mode))
19395 operands[2] = force_reg (mode, operands[2]);
19396 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19397 emit_insn (gen_rtx_SET (operands[0],
19398 gen_rtx_fmt_ee (code, mode, operands[1],
19402 /* Return TRUE or FALSE depending on whether the binary operator meets the
19403 appropriate constraints. */
19406 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19409 rtx dst = operands[0];
19410 rtx src1 = operands[1];
19411 rtx src2 = operands[2];
19413 /* Both source operands cannot be in memory. */
19414 if (MEM_P (src1) && MEM_P (src2))
19417 /* Canonicalize operand order for commutative operators. */
19418 if (ix86_swap_binary_operands_p (code, mode, operands))
19419 std::swap (src1, src2);
19421 /* If the destination is memory, we must have a matching source operand. */
19422 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19425 /* Source 1 cannot be a constant. */
19426 if (CONSTANT_P (src1))
19429 /* Source 1 cannot be a non-matching memory. */
19430 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19431 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19432 return (code == AND
19435 || (TARGET_64BIT && mode == DImode))
19436 && satisfies_constraint_L (src2));
19441 /* Attempt to expand a unary operator. Make the expansion closer to the
19442 actual machine, then just general_operand, which will allow 2 separate
19443 memory references (one output, one input) in a single insn. */
19446 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19449 bool matching_memory = false;
19450 rtx src, dst, op, clob;
19455 /* If the destination is memory, and we do not have matching source
19456 operands, do things in registers. */
19459 if (rtx_equal_p (dst, src))
19460 matching_memory = true;
19462 dst = gen_reg_rtx (mode);
19465 /* When source operand is memory, destination must match. */
19466 if (MEM_P (src) && !matching_memory)
19467 src = force_reg (mode, src);
19469 /* Emit the instruction. */
19471 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19477 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19478 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19481 /* Fix up the destination if needed. */
19482 if (dst != operands[0])
19483 emit_move_insn (operands[0], dst);
19486 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19487 divisor are within the range [0-255]. */
19490 ix86_split_idivmod (machine_mode mode, rtx operands[],
19493 rtx_code_label *end_label, *qimode_label;
19494 rtx insn, div, mod;
19495 rtx scratch, tmp0, tmp1, tmp2;
19496 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19497 rtx (*gen_zero_extend) (rtx, rtx);
19498 rtx (*gen_test_ccno_1) (rtx, rtx);
19503 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19504 gen_test_ccno_1 = gen_testsi_ccno_1;
19505 gen_zero_extend = gen_zero_extendqisi2;
19508 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19509 gen_test_ccno_1 = gen_testdi_ccno_1;
19510 gen_zero_extend = gen_zero_extendqidi2;
19513 gcc_unreachable ();
19516 end_label = gen_label_rtx ();
19517 qimode_label = gen_label_rtx ();
19519 scratch = gen_reg_rtx (mode);
19521 /* Use 8bit unsigned divimod if dividend and divisor are within
19522 the range [0-255]. */
19523 emit_move_insn (scratch, operands[2]);
19524 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19525 scratch, 1, OPTAB_DIRECT);
19526 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19527 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19528 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19529 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19530 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19532 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19533 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19534 JUMP_LABEL (insn) = qimode_label;
19536 /* Generate original signed/unsigned divimod. */
19537 div = gen_divmod4_1 (operands[0], operands[1],
19538 operands[2], operands[3]);
19541 /* Branch to the end. */
19542 emit_jump_insn (gen_jump (end_label));
19545 /* Generate 8bit unsigned divide. */
19546 emit_label (qimode_label);
19547 /* Don't use operands[0] for result of 8bit divide since not all
19548 registers support QImode ZERO_EXTRACT. */
19549 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19550 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19551 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19552 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19556 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19557 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19561 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19562 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19565 /* Extract remainder from AH. */
19566 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19567 if (REG_P (operands[1]))
19568 insn = emit_move_insn (operands[1], tmp1);
19571 /* Need a new scratch register since the old one has result
19573 scratch = gen_reg_rtx (mode);
19574 emit_move_insn (scratch, tmp1);
19575 insn = emit_move_insn (operands[1], scratch);
19577 set_unique_reg_note (insn, REG_EQUAL, mod);
19579 /* Zero extend quotient from AL. */
19580 tmp1 = gen_lowpart (QImode, tmp0);
19581 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19582 set_unique_reg_note (insn, REG_EQUAL, div);
19584 emit_label (end_label);
19587 #define LEA_MAX_STALL (3)
19588 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19590 /* Increase given DISTANCE in half-cycles according to
19591 dependencies between PREV and NEXT instructions.
19592 Add 1 half-cycle if there is no dependency and
19593 go to next cycle if there is some dependecy. */
19595 static unsigned int
19596 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19600 if (!prev || !next)
19601 return distance + (distance & 1) + 2;
19603 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19604 return distance + 1;
19606 FOR_EACH_INSN_USE (use, next)
19607 FOR_EACH_INSN_DEF (def, prev)
19608 if (!DF_REF_IS_ARTIFICIAL (def)
19609 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19610 return distance + (distance & 1) + 2;
19612 return distance + 1;
19615 /* Function checks if instruction INSN defines register number
19616 REGNO1 or REGNO2. */
19619 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19624 FOR_EACH_INSN_DEF (def, insn)
19625 if (DF_REF_REG_DEF_P (def)
19626 && !DF_REF_IS_ARTIFICIAL (def)
19627 && (regno1 == DF_REF_REGNO (def)
19628 || regno2 == DF_REF_REGNO (def)))
19634 /* Function checks if instruction INSN uses register number
19635 REGNO as a part of address expression. */
19638 insn_uses_reg_mem (unsigned int regno, rtx insn)
19642 FOR_EACH_INSN_USE (use, insn)
19643 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19649 /* Search backward for non-agu definition of register number REGNO1
19650 or register number REGNO2 in basic block starting from instruction
19651 START up to head of basic block or instruction INSN.
19653 Function puts true value into *FOUND var if definition was found
19654 and false otherwise.
19656 Distance in half-cycles between START and found instruction or head
19657 of BB is added to DISTANCE and returned. */
19660 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19661 rtx_insn *insn, int distance,
19662 rtx_insn *start, bool *found)
19664 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19665 rtx_insn *prev = start;
19666 rtx_insn *next = NULL;
19672 && distance < LEA_SEARCH_THRESHOLD)
19674 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19676 distance = increase_distance (prev, next, distance);
19677 if (insn_defines_reg (regno1, regno2, prev))
19679 if (recog_memoized (prev) < 0
19680 || get_attr_type (prev) != TYPE_LEA)
19689 if (prev == BB_HEAD (bb))
19692 prev = PREV_INSN (prev);
19698 /* Search backward for non-agu definition of register number REGNO1
19699 or register number REGNO2 in INSN's basic block until
19700 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19701 2. Reach neighbour BBs boundary, or
19702 3. Reach agu definition.
19703 Returns the distance between the non-agu definition point and INSN.
19704 If no definition point, returns -1. */
19707 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19710 basic_block bb = BLOCK_FOR_INSN (insn);
19712 bool found = false;
19714 if (insn != BB_HEAD (bb))
19715 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19716 distance, PREV_INSN (insn),
19719 if (!found && distance < LEA_SEARCH_THRESHOLD)
19723 bool simple_loop = false;
19725 FOR_EACH_EDGE (e, ei, bb->preds)
19728 simple_loop = true;
19733 distance = distance_non_agu_define_in_bb (regno1, regno2,
19735 BB_END (bb), &found);
19738 int shortest_dist = -1;
19739 bool found_in_bb = false;
19741 FOR_EACH_EDGE (e, ei, bb->preds)
19744 = distance_non_agu_define_in_bb (regno1, regno2,
19750 if (shortest_dist < 0)
19751 shortest_dist = bb_dist;
19752 else if (bb_dist > 0)
19753 shortest_dist = MIN (bb_dist, shortest_dist);
19759 distance = shortest_dist;
19763 /* get_attr_type may modify recog data. We want to make sure
19764 that recog data is valid for instruction INSN, on which
19765 distance_non_agu_define is called. INSN is unchanged here. */
19766 extract_insn_cached (insn);
19771 return distance >> 1;
19774 /* Return the distance in half-cycles between INSN and the next
19775 insn that uses register number REGNO in memory address added
19776 to DISTANCE. Return -1 if REGNO0 is set.
19778 Put true value into *FOUND if register usage was found and
19780 Put true value into *REDEFINED if register redefinition was
19781 found and false otherwise. */
19784 distance_agu_use_in_bb (unsigned int regno,
19785 rtx_insn *insn, int distance, rtx_insn *start,
19786 bool *found, bool *redefined)
19788 basic_block bb = NULL;
19789 rtx_insn *next = start;
19790 rtx_insn *prev = NULL;
19793 *redefined = false;
19795 if (start != NULL_RTX)
19797 bb = BLOCK_FOR_INSN (start);
19798 if (start != BB_HEAD (bb))
19799 /* If insn and start belong to the same bb, set prev to insn,
19800 so the call to increase_distance will increase the distance
19801 between insns by 1. */
19807 && distance < LEA_SEARCH_THRESHOLD)
19809 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19811 distance = increase_distance(prev, next, distance);
19812 if (insn_uses_reg_mem (regno, next))
19814 /* Return DISTANCE if OP0 is used in memory
19815 address in NEXT. */
19820 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19822 /* Return -1 if OP0 is set in NEXT. */
19830 if (next == BB_END (bb))
19833 next = NEXT_INSN (next);
19839 /* Return the distance between INSN and the next insn that uses
19840 register number REGNO0 in memory address. Return -1 if no such
19841 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19844 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19846 basic_block bb = BLOCK_FOR_INSN (insn);
19848 bool found = false;
19849 bool redefined = false;
19851 if (insn != BB_END (bb))
19852 distance = distance_agu_use_in_bb (regno0, insn, distance,
19854 &found, &redefined);
19856 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19860 bool simple_loop = false;
19862 FOR_EACH_EDGE (e, ei, bb->succs)
19865 simple_loop = true;
19870 distance = distance_agu_use_in_bb (regno0, insn,
19871 distance, BB_HEAD (bb),
19872 &found, &redefined);
19875 int shortest_dist = -1;
19876 bool found_in_bb = false;
19877 bool redefined_in_bb = false;
19879 FOR_EACH_EDGE (e, ei, bb->succs)
19882 = distance_agu_use_in_bb (regno0, insn,
19883 distance, BB_HEAD (e->dest),
19884 &found_in_bb, &redefined_in_bb);
19887 if (shortest_dist < 0)
19888 shortest_dist = bb_dist;
19889 else if (bb_dist > 0)
19890 shortest_dist = MIN (bb_dist, shortest_dist);
19896 distance = shortest_dist;
19900 if (!found || redefined)
19903 return distance >> 1;
19906 /* Define this macro to tune LEA priority vs ADD, it take effect when
19907 there is a dilemma of choicing LEA or ADD
19908 Negative value: ADD is more preferred than LEA
19910 Positive value: LEA is more preferred than ADD*/
19911 #define IX86_LEA_PRIORITY 0
19913 /* Return true if usage of lea INSN has performance advantage
19914 over a sequence of instructions. Instructions sequence has
19915 SPLIT_COST cycles higher latency than lea latency. */
19918 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19919 unsigned int regno2, int split_cost, bool has_scale)
19921 int dist_define, dist_use;
19923 /* For Silvermont if using a 2-source or 3-source LEA for
19924 non-destructive destination purposes, or due to wanting
19925 ability to use SCALE, the use of LEA is justified. */
19926 if (TARGET_SILVERMONT || TARGET_INTEL)
19930 if (split_cost < 1)
19932 if (regno0 == regno1 || regno0 == regno2)
19937 dist_define = distance_non_agu_define (regno1, regno2, insn);
19938 dist_use = distance_agu_use (regno0, insn);
19940 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19942 /* If there is no non AGU operand definition, no AGU
19943 operand usage and split cost is 0 then both lea
19944 and non lea variants have same priority. Currently
19945 we prefer lea for 64 bit code and non lea on 32 bit
19947 if (dist_use < 0 && split_cost == 0)
19948 return TARGET_64BIT || IX86_LEA_PRIORITY;
19953 /* With longer definitions distance lea is more preferable.
19954 Here we change it to take into account splitting cost and
19956 dist_define += split_cost + IX86_LEA_PRIORITY;
19958 /* If there is no use in memory addess then we just check
19959 that split cost exceeds AGU stall. */
19961 return dist_define > LEA_MAX_STALL;
19963 /* If this insn has both backward non-agu dependence and forward
19964 agu dependence, the one with short distance takes effect. */
19965 return dist_define >= dist_use;
19968 /* Return true if it is legal to clobber flags by INSN and
19969 false otherwise. */
19972 ix86_ok_to_clobber_flags (rtx_insn *insn)
19974 basic_block bb = BLOCK_FOR_INSN (insn);
19980 if (NONDEBUG_INSN_P (insn))
19982 FOR_EACH_INSN_USE (use, insn)
19983 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
19986 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
19990 if (insn == BB_END (bb))
19993 insn = NEXT_INSN (insn);
19996 live = df_get_live_out(bb);
19997 return !REGNO_REG_SET_P (live, FLAGS_REG);
20000 /* Return true if we need to split op0 = op1 + op2 into a sequence of
20001 move and add to avoid AGU stalls. */
20004 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20006 unsigned int regno0, regno1, regno2;
20008 /* Check if we need to optimize. */
20009 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20012 /* Check it is correct to split here. */
20013 if (!ix86_ok_to_clobber_flags(insn))
20016 regno0 = true_regnum (operands[0]);
20017 regno1 = true_regnum (operands[1]);
20018 regno2 = true_regnum (operands[2]);
20020 /* We need to split only adds with non destructive
20021 destination operand. */
20022 if (regno0 == regno1 || regno0 == regno2)
20025 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20028 /* Return true if we should emit lea instruction instead of mov
20032 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20034 unsigned int regno0, regno1;
20036 /* Check if we need to optimize. */
20037 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20040 /* Use lea for reg to reg moves only. */
20041 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20044 regno0 = true_regnum (operands[0]);
20045 regno1 = true_regnum (operands[1]);
20047 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20050 /* Return true if we need to split lea into a sequence of
20051 instructions to avoid AGU stalls. */
20054 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20056 unsigned int regno0, regno1, regno2;
20058 struct ix86_address parts;
20061 /* Check we need to optimize. */
20062 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20065 /* The "at least two components" test below might not catch simple
20066 move or zero extension insns if parts.base is non-NULL and parts.disp
20067 is const0_rtx as the only components in the address, e.g. if the
20068 register is %rbp or %r13. As this test is much cheaper and moves or
20069 zero extensions are the common case, do this check first. */
20070 if (REG_P (operands[1])
20071 || (SImode_address_operand (operands[1], VOIDmode)
20072 && REG_P (XEXP (operands[1], 0))))
20075 /* Check if it is OK to split here. */
20076 if (!ix86_ok_to_clobber_flags (insn))
20079 ok = ix86_decompose_address (operands[1], &parts);
20082 /* There should be at least two components in the address. */
20083 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20084 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20087 /* We should not split into add if non legitimate pic
20088 operand is used as displacement. */
20089 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20092 regno0 = true_regnum (operands[0]) ;
20093 regno1 = INVALID_REGNUM;
20094 regno2 = INVALID_REGNUM;
20097 regno1 = true_regnum (parts.base);
20099 regno2 = true_regnum (parts.index);
20103 /* Compute how many cycles we will add to execution time
20104 if split lea into a sequence of instructions. */
20105 if (parts.base || parts.index)
20107 /* Have to use mov instruction if non desctructive
20108 destination form is used. */
20109 if (regno1 != regno0 && regno2 != regno0)
20112 /* Have to add index to base if both exist. */
20113 if (parts.base && parts.index)
20116 /* Have to use shift and adds if scale is 2 or greater. */
20117 if (parts.scale > 1)
20119 if (regno0 != regno1)
20121 else if (regno2 == regno0)
20124 split_cost += parts.scale;
20127 /* Have to use add instruction with immediate if
20128 disp is non zero. */
20129 if (parts.disp && parts.disp != const0_rtx)
20132 /* Subtract the price of lea. */
20136 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20140 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20141 matches destination. RTX includes clobber of FLAGS_REG. */
20144 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20149 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20150 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20152 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20155 /* Return true if regno1 def is nearest to the insn. */
20158 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20160 rtx_insn *prev = insn;
20161 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20165 while (prev && prev != start)
20167 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20169 prev = PREV_INSN (prev);
20172 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20174 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20176 prev = PREV_INSN (prev);
20179 /* None of the regs is defined in the bb. */
20183 /* Split lea instructions into a sequence of instructions
20184 which are executed on ALU to avoid AGU stalls.
20185 It is assumed that it is allowed to clobber flags register
20186 at lea position. */
20189 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20191 unsigned int regno0, regno1, regno2;
20192 struct ix86_address parts;
20196 ok = ix86_decompose_address (operands[1], &parts);
20199 target = gen_lowpart (mode, operands[0]);
20201 regno0 = true_regnum (target);
20202 regno1 = INVALID_REGNUM;
20203 regno2 = INVALID_REGNUM;
20207 parts.base = gen_lowpart (mode, parts.base);
20208 regno1 = true_regnum (parts.base);
20213 parts.index = gen_lowpart (mode, parts.index);
20214 regno2 = true_regnum (parts.index);
20218 parts.disp = gen_lowpart (mode, parts.disp);
20220 if (parts.scale > 1)
20222 /* Case r1 = r1 + ... */
20223 if (regno1 == regno0)
20225 /* If we have a case r1 = r1 + C * r2 then we
20226 should use multiplication which is very
20227 expensive. Assume cost model is wrong if we
20228 have such case here. */
20229 gcc_assert (regno2 != regno0);
20231 for (adds = parts.scale; adds > 0; adds--)
20232 ix86_emit_binop (PLUS, mode, target, parts.index);
20236 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20237 if (regno0 != regno2)
20238 emit_insn (gen_rtx_SET (target, parts.index));
20240 /* Use shift for scaling. */
20241 ix86_emit_binop (ASHIFT, mode, target,
20242 GEN_INT (exact_log2 (parts.scale)));
20245 ix86_emit_binop (PLUS, mode, target, parts.base);
20247 if (parts.disp && parts.disp != const0_rtx)
20248 ix86_emit_binop (PLUS, mode, target, parts.disp);
20251 else if (!parts.base && !parts.index)
20253 gcc_assert(parts.disp);
20254 emit_insn (gen_rtx_SET (target, parts.disp));
20260 if (regno0 != regno2)
20261 emit_insn (gen_rtx_SET (target, parts.index));
20263 else if (!parts.index)
20265 if (regno0 != regno1)
20266 emit_insn (gen_rtx_SET (target, parts.base));
20270 if (regno0 == regno1)
20272 else if (regno0 == regno2)
20278 /* Find better operand for SET instruction, depending
20279 on which definition is farther from the insn. */
20280 if (find_nearest_reg_def (insn, regno1, regno2))
20281 tmp = parts.index, tmp1 = parts.base;
20283 tmp = parts.base, tmp1 = parts.index;
20285 emit_insn (gen_rtx_SET (target, tmp));
20287 if (parts.disp && parts.disp != const0_rtx)
20288 ix86_emit_binop (PLUS, mode, target, parts.disp);
20290 ix86_emit_binop (PLUS, mode, target, tmp1);
20294 ix86_emit_binop (PLUS, mode, target, tmp);
20297 if (parts.disp && parts.disp != const0_rtx)
20298 ix86_emit_binop (PLUS, mode, target, parts.disp);
20302 /* Return true if it is ok to optimize an ADD operation to LEA
20303 operation to avoid flag register consumation. For most processors,
20304 ADD is faster than LEA. For the processors like BONNELL, if the
20305 destination register of LEA holds an actual address which will be
20306 used soon, LEA is better and otherwise ADD is better. */
20309 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20311 unsigned int regno0 = true_regnum (operands[0]);
20312 unsigned int regno1 = true_regnum (operands[1]);
20313 unsigned int regno2 = true_regnum (operands[2]);
20315 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20316 if (regno0 != regno1 && regno0 != regno2)
20319 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20322 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20325 /* Return true if destination reg of SET_BODY is shift count of
20329 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20335 /* Retrieve destination of SET_BODY. */
20336 switch (GET_CODE (set_body))
20339 set_dest = SET_DEST (set_body);
20340 if (!set_dest || !REG_P (set_dest))
20344 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20345 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20353 /* Retrieve shift count of USE_BODY. */
20354 switch (GET_CODE (use_body))
20357 shift_rtx = XEXP (use_body, 1);
20360 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20361 if (ix86_dep_by_shift_count_body (set_body,
20362 XVECEXP (use_body, 0, i)))
20370 && (GET_CODE (shift_rtx) == ASHIFT
20371 || GET_CODE (shift_rtx) == LSHIFTRT
20372 || GET_CODE (shift_rtx) == ASHIFTRT
20373 || GET_CODE (shift_rtx) == ROTATE
20374 || GET_CODE (shift_rtx) == ROTATERT))
20376 rtx shift_count = XEXP (shift_rtx, 1);
20378 /* Return true if shift count is dest of SET_BODY. */
20379 if (REG_P (shift_count))
20381 /* Add check since it can be invoked before register
20382 allocation in pre-reload schedule. */
20383 if (reload_completed
20384 && true_regnum (set_dest) == true_regnum (shift_count))
20386 else if (REGNO(set_dest) == REGNO(shift_count))
20394 /* Return true if destination reg of SET_INSN is shift count of
20398 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20400 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20401 PATTERN (use_insn));
20404 /* Return TRUE or FALSE depending on whether the unary operator meets the
20405 appropriate constraints. */
20408 ix86_unary_operator_ok (enum rtx_code,
20412 /* If one of operands is memory, source and destination must match. */
20413 if ((MEM_P (operands[0])
20414 || MEM_P (operands[1]))
20415 && ! rtx_equal_p (operands[0], operands[1]))
20420 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20421 are ok, keeping in mind the possible movddup alternative. */
20424 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20426 if (MEM_P (operands[0]))
20427 return rtx_equal_p (operands[0], operands[1 + high]);
20428 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20429 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20433 /* Post-reload splitter for converting an SF or DFmode value in an
20434 SSE register into an unsigned SImode. */
20437 ix86_split_convert_uns_si_sse (rtx operands[])
20439 machine_mode vecmode;
20440 rtx value, large, zero_or_two31, input, two31, x;
20442 large = operands[1];
20443 zero_or_two31 = operands[2];
20444 input = operands[3];
20445 two31 = operands[4];
20446 vecmode = GET_MODE (large);
20447 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20449 /* Load up the value into the low element. We must ensure that the other
20450 elements are valid floats -- zero is the easiest such value. */
20453 if (vecmode == V4SFmode)
20454 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20456 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20460 input = gen_rtx_REG (vecmode, REGNO (input));
20461 emit_move_insn (value, CONST0_RTX (vecmode));
20462 if (vecmode == V4SFmode)
20463 emit_insn (gen_sse_movss (value, value, input));
20465 emit_insn (gen_sse2_movsd (value, value, input));
20468 emit_move_insn (large, two31);
20469 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20471 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20472 emit_insn (gen_rtx_SET (large, x));
20474 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20475 emit_insn (gen_rtx_SET (zero_or_two31, x));
20477 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20478 emit_insn (gen_rtx_SET (value, x));
20480 large = gen_rtx_REG (V4SImode, REGNO (large));
20481 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20483 x = gen_rtx_REG (V4SImode, REGNO (value));
20484 if (vecmode == V4SFmode)
20485 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20487 emit_insn (gen_sse2_cvttpd2dq (x, value));
20490 emit_insn (gen_xorv4si3 (value, value, large));
20493 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20494 Expects the 64-bit DImode to be supplied in a pair of integral
20495 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20496 -mfpmath=sse, !optimize_size only. */
20499 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20501 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20502 rtx int_xmm, fp_xmm;
20503 rtx biases, exponents;
20506 int_xmm = gen_reg_rtx (V4SImode);
20507 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20508 emit_insn (gen_movdi_to_sse (int_xmm, input));
20509 else if (TARGET_SSE_SPLIT_REGS)
20511 emit_clobber (int_xmm);
20512 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20516 x = gen_reg_rtx (V2DImode);
20517 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20518 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20521 x = gen_rtx_CONST_VECTOR (V4SImode,
20522 gen_rtvec (4, GEN_INT (0x43300000UL),
20523 GEN_INT (0x45300000UL),
20524 const0_rtx, const0_rtx));
20525 exponents = validize_mem (force_const_mem (V4SImode, x));
20527 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20528 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20530 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20531 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20532 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20533 (0x1.0p84 + double(fp_value_hi_xmm)).
20534 Note these exponents differ by 32. */
20536 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20538 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20539 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20540 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20541 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20542 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20543 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20544 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20545 biases = validize_mem (force_const_mem (V2DFmode, biases));
20546 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20548 /* Add the upper and lower DFmode values together. */
20550 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20553 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20554 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20555 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20558 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20561 /* Not used, but eases macroization of patterns. */
20563 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20565 gcc_unreachable ();
20568 /* Convert an unsigned SImode value into a DFmode. Only currently used
20569 for SSE, but applicable anywhere. */
20572 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20574 REAL_VALUE_TYPE TWO31r;
20577 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20578 NULL, 1, OPTAB_DIRECT);
20580 fp = gen_reg_rtx (DFmode);
20581 emit_insn (gen_floatsidf2 (fp, x));
20583 real_ldexp (&TWO31r, &dconst1, 31);
20584 x = const_double_from_real_value (TWO31r, DFmode);
20586 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20588 emit_move_insn (target, x);
20591 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20592 32-bit mode; otherwise we have a direct convert instruction. */
20595 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20597 REAL_VALUE_TYPE TWO32r;
20598 rtx fp_lo, fp_hi, x;
20600 fp_lo = gen_reg_rtx (DFmode);
20601 fp_hi = gen_reg_rtx (DFmode);
20603 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20605 real_ldexp (&TWO32r, &dconst1, 32);
20606 x = const_double_from_real_value (TWO32r, DFmode);
20607 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20609 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20611 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20614 emit_move_insn (target, x);
20617 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20618 For x86_32, -mfpmath=sse, !optimize_size only. */
20620 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20622 REAL_VALUE_TYPE ONE16r;
20623 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20625 real_ldexp (&ONE16r, &dconst1, 16);
20626 x = const_double_from_real_value (ONE16r, SFmode);
20627 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20628 NULL, 0, OPTAB_DIRECT);
20629 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20630 NULL, 0, OPTAB_DIRECT);
20631 fp_hi = gen_reg_rtx (SFmode);
20632 fp_lo = gen_reg_rtx (SFmode);
20633 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20634 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20635 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20637 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20639 if (!rtx_equal_p (target, fp_hi))
20640 emit_move_insn (target, fp_hi);
20643 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20644 a vector of unsigned ints VAL to vector of floats TARGET. */
20647 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20650 REAL_VALUE_TYPE TWO16r;
20651 machine_mode intmode = GET_MODE (val);
20652 machine_mode fltmode = GET_MODE (target);
20653 rtx (*cvt) (rtx, rtx);
20655 if (intmode == V4SImode)
20656 cvt = gen_floatv4siv4sf2;
20658 cvt = gen_floatv8siv8sf2;
20659 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20660 tmp[0] = force_reg (intmode, tmp[0]);
20661 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20663 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20664 NULL_RTX, 1, OPTAB_DIRECT);
20665 tmp[3] = gen_reg_rtx (fltmode);
20666 emit_insn (cvt (tmp[3], tmp[1]));
20667 tmp[4] = gen_reg_rtx (fltmode);
20668 emit_insn (cvt (tmp[4], tmp[2]));
20669 real_ldexp (&TWO16r, &dconst1, 16);
20670 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20671 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20672 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20674 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20676 if (tmp[7] != target)
20677 emit_move_insn (target, tmp[7]);
20680 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20681 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20682 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20683 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20686 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20688 REAL_VALUE_TYPE TWO31r;
20689 rtx two31r, tmp[4];
20690 machine_mode mode = GET_MODE (val);
20691 machine_mode scalarmode = GET_MODE_INNER (mode);
20692 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20693 rtx (*cmp) (rtx, rtx, rtx, rtx);
20696 for (i = 0; i < 3; i++)
20697 tmp[i] = gen_reg_rtx (mode);
20698 real_ldexp (&TWO31r, &dconst1, 31);
20699 two31r = const_double_from_real_value (TWO31r, scalarmode);
20700 two31r = ix86_build_const_vector (mode, 1, two31r);
20701 two31r = force_reg (mode, two31r);
20704 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20705 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20706 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20707 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20708 default: gcc_unreachable ();
20710 tmp[3] = gen_rtx_LE (mode, two31r, val);
20711 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20712 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20714 if (intmode == V4SImode || TARGET_AVX2)
20715 *xorp = expand_simple_binop (intmode, ASHIFT,
20716 gen_lowpart (intmode, tmp[0]),
20717 GEN_INT (31), NULL_RTX, 0,
20721 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20722 two31 = ix86_build_const_vector (intmode, 1, two31);
20723 *xorp = expand_simple_binop (intmode, AND,
20724 gen_lowpart (intmode, tmp[0]),
20725 two31, NULL_RTX, 0,
20728 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20732 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20733 then replicate the value for all elements of the vector
20737 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20741 machine_mode scalar_mode;
20764 n_elt = GET_MODE_NUNITS (mode);
20765 v = rtvec_alloc (n_elt);
20766 scalar_mode = GET_MODE_INNER (mode);
20768 RTVEC_ELT (v, 0) = value;
20770 for (i = 1; i < n_elt; ++i)
20771 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20773 return gen_rtx_CONST_VECTOR (mode, v);
20776 gcc_unreachable ();
20780 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20781 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20782 for an SSE register. If VECT is true, then replicate the mask for
20783 all elements of the vector register. If INVERT is true, then create
20784 a mask excluding the sign bit. */
20787 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20789 machine_mode vec_mode, imode;
20817 vec_mode = VOIDmode;
20822 gcc_unreachable ();
20825 machine_mode inner_mode = GET_MODE_INNER (mode);
20826 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20827 GET_MODE_BITSIZE (inner_mode));
20829 w = wi::bit_not (w);
20831 /* Force this value into the low part of a fp vector constant. */
20832 mask = immed_wide_int_const (w, imode);
20833 mask = gen_lowpart (inner_mode, mask);
20835 if (vec_mode == VOIDmode)
20836 return force_reg (inner_mode, mask);
20838 v = ix86_build_const_vector (vec_mode, vect, mask);
20839 return force_reg (vec_mode, v);
20842 /* Generate code for floating point ABS or NEG. */
20845 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20848 rtx mask, set, dst, src;
20849 bool use_sse = false;
20850 bool vector_mode = VECTOR_MODE_P (mode);
20851 machine_mode vmode = mode;
20855 else if (mode == TFmode)
20857 else if (TARGET_SSE_MATH)
20859 use_sse = SSE_FLOAT_MODE_P (mode);
20860 if (mode == SFmode)
20862 else if (mode == DFmode)
20866 /* NEG and ABS performed with SSE use bitwise mask operations.
20867 Create the appropriate mask now. */
20869 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20876 set = gen_rtx_fmt_e (code, mode, src);
20877 set = gen_rtx_SET (dst, set);
20884 use = gen_rtx_USE (VOIDmode, mask);
20886 par = gen_rtvec (2, set, use);
20889 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20890 par = gen_rtvec (3, set, use, clob);
20892 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20898 /* Expand a copysign operation. Special case operand 0 being a constant. */
20901 ix86_expand_copysign (rtx operands[])
20903 machine_mode mode, vmode;
20904 rtx dest, op0, op1, mask, nmask;
20906 dest = operands[0];
20910 mode = GET_MODE (dest);
20912 if (mode == SFmode)
20914 else if (mode == DFmode)
20919 if (CONST_DOUBLE_P (op0))
20921 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20923 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20924 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20926 if (mode == SFmode || mode == DFmode)
20928 if (op0 == CONST0_RTX (mode))
20929 op0 = CONST0_RTX (vmode);
20932 rtx v = ix86_build_const_vector (vmode, false, op0);
20934 op0 = force_reg (vmode, v);
20937 else if (op0 != CONST0_RTX (mode))
20938 op0 = force_reg (mode, op0);
20940 mask = ix86_build_signbit_mask (vmode, 0, 0);
20942 if (mode == SFmode)
20943 copysign_insn = gen_copysignsf3_const;
20944 else if (mode == DFmode)
20945 copysign_insn = gen_copysigndf3_const;
20947 copysign_insn = gen_copysigntf3_const;
20949 emit_insn (copysign_insn (dest, op0, op1, mask));
20953 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20955 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20956 mask = ix86_build_signbit_mask (vmode, 0, 0);
20958 if (mode == SFmode)
20959 copysign_insn = gen_copysignsf3_var;
20960 else if (mode == DFmode)
20961 copysign_insn = gen_copysigndf3_var;
20963 copysign_insn = gen_copysigntf3_var;
20965 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
20969 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
20970 be a constant, and so has already been expanded into a vector constant. */
20973 ix86_split_copysign_const (rtx operands[])
20975 machine_mode mode, vmode;
20976 rtx dest, op0, mask, x;
20978 dest = operands[0];
20980 mask = operands[3];
20982 mode = GET_MODE (dest);
20983 vmode = GET_MODE (mask);
20985 dest = simplify_gen_subreg (vmode, dest, mode, 0);
20986 x = gen_rtx_AND (vmode, dest, mask);
20987 emit_insn (gen_rtx_SET (dest, x));
20989 if (op0 != CONST0_RTX (vmode))
20991 x = gen_rtx_IOR (vmode, dest, op0);
20992 emit_insn (gen_rtx_SET (dest, x));
20996 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
20997 so we have to do two masks. */
21000 ix86_split_copysign_var (rtx operands[])
21002 machine_mode mode, vmode;
21003 rtx dest, scratch, op0, op1, mask, nmask, x;
21005 dest = operands[0];
21006 scratch = operands[1];
21009 nmask = operands[4];
21010 mask = operands[5];
21012 mode = GET_MODE (dest);
21013 vmode = GET_MODE (mask);
21015 if (rtx_equal_p (op0, op1))
21017 /* Shouldn't happen often (it's useless, obviously), but when it does
21018 we'd generate incorrect code if we continue below. */
21019 emit_move_insn (dest, op0);
21023 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21025 gcc_assert (REGNO (op1) == REGNO (scratch));
21027 x = gen_rtx_AND (vmode, scratch, mask);
21028 emit_insn (gen_rtx_SET (scratch, x));
21031 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21032 x = gen_rtx_NOT (vmode, dest);
21033 x = gen_rtx_AND (vmode, x, op0);
21034 emit_insn (gen_rtx_SET (dest, x));
21038 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21040 x = gen_rtx_AND (vmode, scratch, mask);
21042 else /* alternative 2,4 */
21044 gcc_assert (REGNO (mask) == REGNO (scratch));
21045 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
21046 x = gen_rtx_AND (vmode, scratch, op1);
21048 emit_insn (gen_rtx_SET (scratch, x));
21050 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21052 dest = simplify_gen_subreg (vmode, op0, mode, 0);
21053 x = gen_rtx_AND (vmode, dest, nmask);
21055 else /* alternative 3,4 */
21057 gcc_assert (REGNO (nmask) == REGNO (dest));
21059 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21060 x = gen_rtx_AND (vmode, dest, op0);
21062 emit_insn (gen_rtx_SET (dest, x));
21065 x = gen_rtx_IOR (vmode, dest, scratch);
21066 emit_insn (gen_rtx_SET (dest, x));
21069 /* Return TRUE or FALSE depending on whether the first SET in INSN
21070 has source and destination with matching CC modes, and that the
21071 CC mode is at least as constrained as REQ_MODE. */
21074 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21077 machine_mode set_mode;
21079 set = PATTERN (insn);
21080 if (GET_CODE (set) == PARALLEL)
21081 set = XVECEXP (set, 0, 0);
21082 gcc_assert (GET_CODE (set) == SET);
21083 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21085 set_mode = GET_MODE (SET_DEST (set));
21089 if (req_mode != CCNOmode
21090 && (req_mode != CCmode
21091 || XEXP (SET_SRC (set), 1) != const0_rtx))
21095 if (req_mode == CCGCmode)
21099 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21103 if (req_mode == CCZmode)
21114 if (set_mode != req_mode)
21119 gcc_unreachable ();
21122 return GET_MODE (SET_SRC (set)) == set_mode;
21125 /* Generate insn patterns to do an integer compare of OPERANDS. */
21128 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21130 machine_mode cmpmode;
21133 cmpmode = SELECT_CC_MODE (code, op0, op1);
21134 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21136 /* This is very simple, but making the interface the same as in the
21137 FP case makes the rest of the code easier. */
21138 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21139 emit_insn (gen_rtx_SET (flags, tmp));
21141 /* Return the test that should be put into the flags user, i.e.
21142 the bcc, scc, or cmov instruction. */
21143 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21146 /* Figure out whether to use ordered or unordered fp comparisons.
21147 Return the appropriate mode to use. */
21150 ix86_fp_compare_mode (enum rtx_code)
21152 /* ??? In order to make all comparisons reversible, we do all comparisons
21153 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21154 all forms trapping and nontrapping comparisons, we can make inequality
21155 comparisons trapping again, since it results in better code when using
21156 FCOM based compares. */
21157 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21161 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21163 machine_mode mode = GET_MODE (op0);
21165 if (SCALAR_FLOAT_MODE_P (mode))
21167 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21168 return ix86_fp_compare_mode (code);
21173 /* Only zero flag is needed. */
21174 case EQ: /* ZF=0 */
21175 case NE: /* ZF!=0 */
21177 /* Codes needing carry flag. */
21178 case GEU: /* CF=0 */
21179 case LTU: /* CF=1 */
21180 /* Detect overflow checks. They need just the carry flag. */
21181 if (GET_CODE (op0) == PLUS
21182 && (rtx_equal_p (op1, XEXP (op0, 0))
21183 || rtx_equal_p (op1, XEXP (op0, 1))))
21187 case GTU: /* CF=0 & ZF=0 */
21188 case LEU: /* CF=1 | ZF=1 */
21190 /* Codes possibly doable only with sign flag when
21191 comparing against zero. */
21192 case GE: /* SF=OF or SF=0 */
21193 case LT: /* SF<>OF or SF=1 */
21194 if (op1 == const0_rtx)
21197 /* For other cases Carry flag is not required. */
21199 /* Codes doable only with sign flag when comparing
21200 against zero, but we miss jump instruction for it
21201 so we need to use relational tests against overflow
21202 that thus needs to be zero. */
21203 case GT: /* ZF=0 & SF=OF */
21204 case LE: /* ZF=1 | SF<>OF */
21205 if (op1 == const0_rtx)
21209 /* strcmp pattern do (use flags) and combine may ask us for proper
21214 gcc_unreachable ();
21218 /* Return the fixed registers used for condition codes. */
21221 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21228 /* If two condition code modes are compatible, return a condition code
21229 mode which is compatible with both. Otherwise, return
21232 static machine_mode
21233 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21238 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21241 if ((m1 == CCGCmode && m2 == CCGOCmode)
21242 || (m1 == CCGOCmode && m2 == CCGCmode))
21245 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21247 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21253 gcc_unreachable ();
21285 /* These are only compatible with themselves, which we already
21292 /* Return a comparison we can do and that it is equivalent to
21293 swap_condition (code) apart possibly from orderedness.
21294 But, never change orderedness if TARGET_IEEE_FP, returning
21295 UNKNOWN in that case if necessary. */
21297 static enum rtx_code
21298 ix86_fp_swap_condition (enum rtx_code code)
21302 case GT: /* GTU - CF=0 & ZF=0 */
21303 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21304 case GE: /* GEU - CF=0 */
21305 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21306 case UNLT: /* LTU - CF=1 */
21307 return TARGET_IEEE_FP ? UNKNOWN : GT;
21308 case UNLE: /* LEU - CF=1 | ZF=1 */
21309 return TARGET_IEEE_FP ? UNKNOWN : GE;
21311 return swap_condition (code);
21315 /* Return cost of comparison CODE using the best strategy for performance.
21316 All following functions do use number of instructions as a cost metrics.
21317 In future this should be tweaked to compute bytes for optimize_size and
21318 take into account performance of various instructions on various CPUs. */
21321 ix86_fp_comparison_cost (enum rtx_code code)
21325 /* The cost of code using bit-twiddling on %ah. */
21342 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21346 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21349 gcc_unreachable ();
21352 switch (ix86_fp_comparison_strategy (code))
21354 case IX86_FPCMP_COMI:
21355 return arith_cost > 4 ? 3 : 2;
21356 case IX86_FPCMP_SAHF:
21357 return arith_cost > 4 ? 4 : 3;
21363 /* Return strategy to use for floating-point. We assume that fcomi is always
21364 preferrable where available, since that is also true when looking at size
21365 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21367 enum ix86_fpcmp_strategy
21368 ix86_fp_comparison_strategy (enum rtx_code)
21370 /* Do fcomi/sahf based test when profitable. */
21373 return IX86_FPCMP_COMI;
21375 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21376 return IX86_FPCMP_SAHF;
21378 return IX86_FPCMP_ARITH;
21381 /* Swap, force into registers, or otherwise massage the two operands
21382 to a fp comparison. The operands are updated in place; the new
21383 comparison code is returned. */
21385 static enum rtx_code
21386 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21388 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21389 rtx op0 = *pop0, op1 = *pop1;
21390 machine_mode op_mode = GET_MODE (op0);
21391 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21393 /* All of the unordered compare instructions only work on registers.
21394 The same is true of the fcomi compare instructions. The XFmode
21395 compare instructions require registers except when comparing
21396 against zero or when converting operand 1 from fixed point to
21400 && (fpcmp_mode == CCFPUmode
21401 || (op_mode == XFmode
21402 && ! (standard_80387_constant_p (op0) == 1
21403 || standard_80387_constant_p (op1) == 1)
21404 && GET_CODE (op1) != FLOAT)
21405 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21407 op0 = force_reg (op_mode, op0);
21408 op1 = force_reg (op_mode, op1);
21412 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21413 things around if they appear profitable, otherwise force op0
21414 into a register. */
21416 if (standard_80387_constant_p (op0) == 0
21418 && ! (standard_80387_constant_p (op1) == 0
21421 enum rtx_code new_code = ix86_fp_swap_condition (code);
21422 if (new_code != UNKNOWN)
21424 std::swap (op0, op1);
21430 op0 = force_reg (op_mode, op0);
21432 if (CONSTANT_P (op1))
21434 int tmp = standard_80387_constant_p (op1);
21436 op1 = validize_mem (force_const_mem (op_mode, op1));
21440 op1 = force_reg (op_mode, op1);
21443 op1 = force_reg (op_mode, op1);
21447 /* Try to rearrange the comparison to make it cheaper. */
21448 if (ix86_fp_comparison_cost (code)
21449 > ix86_fp_comparison_cost (swap_condition (code))
21450 && (REG_P (op1) || can_create_pseudo_p ()))
21452 std::swap (op0, op1);
21453 code = swap_condition (code);
21455 op0 = force_reg (op_mode, op0);
21463 /* Convert comparison codes we use to represent FP comparison to integer
21464 code that will result in proper branch. Return UNKNOWN if no such code
21468 ix86_fp_compare_code_to_integer (enum rtx_code code)
21497 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21500 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21502 machine_mode fpcmp_mode, intcmp_mode;
21505 fpcmp_mode = ix86_fp_compare_mode (code);
21506 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21508 /* Do fcomi/sahf based test when profitable. */
21509 switch (ix86_fp_comparison_strategy (code))
21511 case IX86_FPCMP_COMI:
21512 intcmp_mode = fpcmp_mode;
21513 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21514 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21518 case IX86_FPCMP_SAHF:
21519 intcmp_mode = fpcmp_mode;
21520 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21521 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21524 scratch = gen_reg_rtx (HImode);
21525 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21526 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21529 case IX86_FPCMP_ARITH:
21530 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21531 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21532 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21534 scratch = gen_reg_rtx (HImode);
21535 emit_insn (gen_rtx_SET (scratch, tmp2));
21537 /* In the unordered case, we have to check C2 for NaN's, which
21538 doesn't happen to work out to anything nice combination-wise.
21539 So do some bit twiddling on the value we've got in AH to come
21540 up with an appropriate set of condition codes. */
21542 intcmp_mode = CCNOmode;
21547 if (code == GT || !TARGET_IEEE_FP)
21549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21554 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21555 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21556 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21557 intcmp_mode = CCmode;
21563 if (code == LT && TARGET_IEEE_FP)
21565 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21566 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21567 intcmp_mode = CCmode;
21572 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21578 if (code == GE || !TARGET_IEEE_FP)
21580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21585 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21586 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21592 if (code == LE && TARGET_IEEE_FP)
21594 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21595 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21596 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21597 intcmp_mode = CCmode;
21602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21608 if (code == EQ && TARGET_IEEE_FP)
21610 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21611 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21612 intcmp_mode = CCmode;
21617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21623 if (code == NE && TARGET_IEEE_FP)
21625 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21626 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21632 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21638 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21642 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21647 gcc_unreachable ();
21655 /* Return the test that should be put into the flags user, i.e.
21656 the bcc, scc, or cmov instruction. */
21657 return gen_rtx_fmt_ee (code, VOIDmode,
21658 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21663 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21667 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21668 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21670 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21672 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21673 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21676 ret = ix86_expand_int_compare (code, op0, op1);
21682 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21684 machine_mode mode = GET_MODE (op0);
21696 tmp = ix86_expand_compare (code, op0, op1);
21697 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21698 gen_rtx_LABEL_REF (VOIDmode, label),
21700 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21706 /* For 32-bit target DI comparison may be performed on
21707 SSE registers. To allow this we should avoid split
21708 to SI mode which is achieved by doing xor in DI mode
21709 and then comparing with zero (which is recognized by
21710 STV pass). We don't compare using xor when optimizing
21712 if (!optimize_insn_for_size_p ()
21714 && (code == EQ || code == NE))
21716 op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1));
21720 /* Expand DImode branch into multiple compare+branch. */
21723 rtx_code_label *label2;
21724 enum rtx_code code1, code2, code3;
21725 machine_mode submode;
21727 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21729 std::swap (op0, op1);
21730 code = swap_condition (code);
21733 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21734 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21736 submode = mode == DImode ? SImode : DImode;
21738 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21739 avoid two branches. This costs one extra insn, so disable when
21740 optimizing for size. */
21742 if ((code == EQ || code == NE)
21743 && (!optimize_insn_for_size_p ()
21744 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21749 if (hi[1] != const0_rtx)
21750 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21751 NULL_RTX, 0, OPTAB_WIDEN);
21754 if (lo[1] != const0_rtx)
21755 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21756 NULL_RTX, 0, OPTAB_WIDEN);
21758 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21759 NULL_RTX, 0, OPTAB_WIDEN);
21761 ix86_expand_branch (code, tmp, const0_rtx, label);
21765 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21766 op1 is a constant and the low word is zero, then we can just
21767 examine the high word. Similarly for low word -1 and
21768 less-or-equal-than or greater-than. */
21770 if (CONST_INT_P (hi[1]))
21773 case LT: case LTU: case GE: case GEU:
21774 if (lo[1] == const0_rtx)
21776 ix86_expand_branch (code, hi[0], hi[1], label);
21780 case LE: case LEU: case GT: case GTU:
21781 if (lo[1] == constm1_rtx)
21783 ix86_expand_branch (code, hi[0], hi[1], label);
21791 /* Otherwise, we need two or three jumps. */
21793 label2 = gen_label_rtx ();
21796 code2 = swap_condition (code);
21797 code3 = unsigned_condition (code);
21801 case LT: case GT: case LTU: case GTU:
21804 case LE: code1 = LT; code2 = GT; break;
21805 case GE: code1 = GT; code2 = LT; break;
21806 case LEU: code1 = LTU; code2 = GTU; break;
21807 case GEU: code1 = GTU; code2 = LTU; break;
21809 case EQ: code1 = UNKNOWN; code2 = NE; break;
21810 case NE: code2 = UNKNOWN; break;
21813 gcc_unreachable ();
21818 * if (hi(a) < hi(b)) goto true;
21819 * if (hi(a) > hi(b)) goto false;
21820 * if (lo(a) < lo(b)) goto true;
21824 if (code1 != UNKNOWN)
21825 ix86_expand_branch (code1, hi[0], hi[1], label);
21826 if (code2 != UNKNOWN)
21827 ix86_expand_branch (code2, hi[0], hi[1], label2);
21829 ix86_expand_branch (code3, lo[0], lo[1], label);
21831 if (code2 != UNKNOWN)
21832 emit_label (label2);
21837 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21842 /* Split branch based on floating point condition. */
21844 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21845 rtx target1, rtx target2, rtx tmp)
21850 if (target2 != pc_rtx)
21852 std::swap (target1, target2);
21853 code = reverse_condition_maybe_unordered (code);
21856 condition = ix86_expand_fp_compare (code, op1, op2,
21859 i = emit_jump_insn (gen_rtx_SET
21861 gen_rtx_IF_THEN_ELSE (VOIDmode,
21862 condition, target1, target2)));
21863 if (split_branch_probability >= 0)
21864 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21868 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21872 gcc_assert (GET_MODE (dest) == QImode);
21874 ret = ix86_expand_compare (code, op0, op1);
21875 PUT_MODE (ret, QImode);
21876 emit_insn (gen_rtx_SET (dest, ret));
21879 /* Expand comparison setting or clearing carry flag. Return true when
21880 successful and set pop for the operation. */
21882 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21884 machine_mode mode =
21885 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21887 /* Do not handle double-mode compares that go through special path. */
21888 if (mode == (TARGET_64BIT ? TImode : DImode))
21891 if (SCALAR_FLOAT_MODE_P (mode))
21894 rtx_insn *compare_seq;
21896 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21898 /* Shortcut: following common codes never translate
21899 into carry flag compares. */
21900 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21901 || code == ORDERED || code == UNORDERED)
21904 /* These comparisons require zero flag; swap operands so they won't. */
21905 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21906 && !TARGET_IEEE_FP)
21908 std::swap (op0, op1);
21909 code = swap_condition (code);
21912 /* Try to expand the comparison and verify that we end up with
21913 carry flag based comparison. This fails to be true only when
21914 we decide to expand comparison using arithmetic that is not
21915 too common scenario. */
21917 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21918 compare_seq = get_insns ();
21921 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21922 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21923 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21925 code = GET_CODE (compare_op);
21927 if (code != LTU && code != GEU)
21930 emit_insn (compare_seq);
21935 if (!INTEGRAL_MODE_P (mode))
21944 /* Convert a==0 into (unsigned)a<1. */
21947 if (op1 != const0_rtx)
21950 code = (code == EQ ? LTU : GEU);
21953 /* Convert a>b into b<a or a>=b-1. */
21956 if (CONST_INT_P (op1))
21958 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
21959 /* Bail out on overflow. We still can swap operands but that
21960 would force loading of the constant into register. */
21961 if (op1 == const0_rtx
21962 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
21964 code = (code == GTU ? GEU : LTU);
21968 std::swap (op0, op1);
21969 code = (code == GTU ? LTU : GEU);
21973 /* Convert a>=0 into (unsigned)a<0x80000000. */
21976 if (mode == DImode || op1 != const0_rtx)
21978 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21979 code = (code == LT ? GEU : LTU);
21983 if (mode == DImode || op1 != constm1_rtx)
21985 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21986 code = (code == LE ? GEU : LTU);
21992 /* Swapping operands may cause constant to appear as first operand. */
21993 if (!nonimmediate_operand (op0, VOIDmode))
21995 if (!can_create_pseudo_p ())
21997 op0 = force_reg (mode, op0);
21999 *pop = ix86_expand_compare (code, op0, op1);
22000 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
22005 ix86_expand_int_movcc (rtx operands[])
22007 enum rtx_code code = GET_CODE (operands[1]), compare_code;
22008 rtx_insn *compare_seq;
22010 machine_mode mode = GET_MODE (operands[0]);
22011 bool sign_bit_compare_p = false;
22012 rtx op0 = XEXP (operands[1], 0);
22013 rtx op1 = XEXP (operands[1], 1);
22015 if (GET_MODE (op0) == TImode
22016 || (GET_MODE (op0) == DImode
22021 compare_op = ix86_expand_compare (code, op0, op1);
22022 compare_seq = get_insns ();
22025 compare_code = GET_CODE (compare_op);
22027 if ((op1 == const0_rtx && (code == GE || code == LT))
22028 || (op1 == constm1_rtx && (code == GT || code == LE)))
22029 sign_bit_compare_p = true;
22031 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22032 HImode insns, we'd be swallowed in word prefix ops. */
22034 if ((mode != HImode || TARGET_FAST_PREFIX)
22035 && (mode != (TARGET_64BIT ? TImode : DImode))
22036 && CONST_INT_P (operands[2])
22037 && CONST_INT_P (operands[3]))
22039 rtx out = operands[0];
22040 HOST_WIDE_INT ct = INTVAL (operands[2]);
22041 HOST_WIDE_INT cf = INTVAL (operands[3]);
22042 HOST_WIDE_INT diff;
22045 /* Sign bit compares are better done using shifts than we do by using
22047 if (sign_bit_compare_p
22048 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22050 /* Detect overlap between destination and compare sources. */
22053 if (!sign_bit_compare_p)
22056 bool fpcmp = false;
22058 compare_code = GET_CODE (compare_op);
22060 flags = XEXP (compare_op, 0);
22062 if (GET_MODE (flags) == CCFPmode
22063 || GET_MODE (flags) == CCFPUmode)
22067 = ix86_fp_compare_code_to_integer (compare_code);
22070 /* To simplify rest of code, restrict to the GEU case. */
22071 if (compare_code == LTU)
22073 std::swap (ct, cf);
22074 compare_code = reverse_condition (compare_code);
22075 code = reverse_condition (code);
22080 PUT_CODE (compare_op,
22081 reverse_condition_maybe_unordered
22082 (GET_CODE (compare_op)));
22084 PUT_CODE (compare_op,
22085 reverse_condition (GET_CODE (compare_op)));
22089 if (reg_overlap_mentioned_p (out, op0)
22090 || reg_overlap_mentioned_p (out, op1))
22091 tmp = gen_reg_rtx (mode);
22093 if (mode == DImode)
22094 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22096 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22097 flags, compare_op));
22101 if (code == GT || code == GE)
22102 code = reverse_condition (code);
22105 std::swap (ct, cf);
22108 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22121 tmp = expand_simple_binop (mode, PLUS,
22123 copy_rtx (tmp), 1, OPTAB_DIRECT);
22134 tmp = expand_simple_binop (mode, IOR,
22136 copy_rtx (tmp), 1, OPTAB_DIRECT);
22138 else if (diff == -1 && ct)
22148 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22150 tmp = expand_simple_binop (mode, PLUS,
22151 copy_rtx (tmp), GEN_INT (cf),
22152 copy_rtx (tmp), 1, OPTAB_DIRECT);
22160 * andl cf - ct, dest
22170 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22173 tmp = expand_simple_binop (mode, AND,
22175 gen_int_mode (cf - ct, mode),
22176 copy_rtx (tmp), 1, OPTAB_DIRECT);
22178 tmp = expand_simple_binop (mode, PLUS,
22179 copy_rtx (tmp), GEN_INT (ct),
22180 copy_rtx (tmp), 1, OPTAB_DIRECT);
22183 if (!rtx_equal_p (tmp, out))
22184 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22191 machine_mode cmp_mode = GET_MODE (op0);
22192 enum rtx_code new_code;
22194 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22196 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22198 /* We may be reversing unordered compare to normal compare, that
22199 is not valid in general (we may convert non-trapping condition
22200 to trapping one), however on i386 we currently emit all
22201 comparisons unordered. */
22202 new_code = reverse_condition_maybe_unordered (code);
22205 new_code = ix86_reverse_condition (code, cmp_mode);
22206 if (new_code != UNKNOWN)
22208 std::swap (ct, cf);
22214 compare_code = UNKNOWN;
22215 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22216 && CONST_INT_P (op1))
22218 if (op1 == const0_rtx
22219 && (code == LT || code == GE))
22220 compare_code = code;
22221 else if (op1 == constm1_rtx)
22225 else if (code == GT)
22230 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22231 if (compare_code != UNKNOWN
22232 && GET_MODE (op0) == GET_MODE (out)
22233 && (cf == -1 || ct == -1))
22235 /* If lea code below could be used, only optimize
22236 if it results in a 2 insn sequence. */
22238 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22239 || diff == 3 || diff == 5 || diff == 9)
22240 || (compare_code == LT && ct == -1)
22241 || (compare_code == GE && cf == -1))
22244 * notl op1 (if necessary)
22252 code = reverse_condition (code);
22255 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22257 out = expand_simple_binop (mode, IOR,
22259 out, 1, OPTAB_DIRECT);
22260 if (out != operands[0])
22261 emit_move_insn (operands[0], out);
22268 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22269 || diff == 3 || diff == 5 || diff == 9)
22270 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22272 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22278 * lea cf(dest*(ct-cf)),dest
22282 * This also catches the degenerate setcc-only case.
22288 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22291 /* On x86_64 the lea instruction operates on Pmode, so we need
22292 to get arithmetics done in proper mode to match. */
22294 tmp = copy_rtx (out);
22298 out1 = copy_rtx (out);
22299 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22303 tmp = gen_rtx_PLUS (mode, tmp, out1);
22309 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22312 if (!rtx_equal_p (tmp, out))
22315 out = force_operand (tmp, copy_rtx (out));
22317 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22319 if (!rtx_equal_p (out, operands[0]))
22320 emit_move_insn (operands[0], copy_rtx (out));
22326 * General case: Jumpful:
22327 * xorl dest,dest cmpl op1, op2
22328 * cmpl op1, op2 movl ct, dest
22329 * setcc dest jcc 1f
22330 * decl dest movl cf, dest
22331 * andl (cf-ct),dest 1:
22334 * Size 20. Size 14.
22336 * This is reasonably steep, but branch mispredict costs are
22337 * high on modern cpus, so consider failing only if optimizing
22341 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22342 && BRANCH_COST (optimize_insn_for_speed_p (),
22347 machine_mode cmp_mode = GET_MODE (op0);
22348 enum rtx_code new_code;
22350 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22352 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22354 /* We may be reversing unordered compare to normal compare,
22355 that is not valid in general (we may convert non-trapping
22356 condition to trapping one), however on i386 we currently
22357 emit all comparisons unordered. */
22358 new_code = reverse_condition_maybe_unordered (code);
22362 new_code = ix86_reverse_condition (code, cmp_mode);
22363 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22364 compare_code = reverse_condition (compare_code);
22367 if (new_code != UNKNOWN)
22375 if (compare_code != UNKNOWN)
22377 /* notl op1 (if needed)
22382 For x < 0 (resp. x <= -1) there will be no notl,
22383 so if possible swap the constants to get rid of the
22385 True/false will be -1/0 while code below (store flag
22386 followed by decrement) is 0/-1, so the constants need
22387 to be exchanged once more. */
22389 if (compare_code == GE || !cf)
22391 code = reverse_condition (code);
22395 std::swap (ct, cf);
22397 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22401 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22403 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22405 copy_rtx (out), 1, OPTAB_DIRECT);
22408 out = expand_simple_binop (mode, AND, copy_rtx (out),
22409 gen_int_mode (cf - ct, mode),
22410 copy_rtx (out), 1, OPTAB_DIRECT);
22412 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22413 copy_rtx (out), 1, OPTAB_DIRECT);
22414 if (!rtx_equal_p (out, operands[0]))
22415 emit_move_insn (operands[0], copy_rtx (out));
22421 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22423 /* Try a few things more with specific constants and a variable. */
22426 rtx var, orig_out, out, tmp;
22428 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22431 /* If one of the two operands is an interesting constant, load a
22432 constant with the above and mask it in with a logical operation. */
22434 if (CONST_INT_P (operands[2]))
22437 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22438 operands[3] = constm1_rtx, op = and_optab;
22439 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22440 operands[3] = const0_rtx, op = ior_optab;
22444 else if (CONST_INT_P (operands[3]))
22447 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22448 operands[2] = constm1_rtx, op = and_optab;
22449 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22450 operands[2] = const0_rtx, op = ior_optab;
22457 orig_out = operands[0];
22458 tmp = gen_reg_rtx (mode);
22461 /* Recurse to get the constant loaded. */
22462 if (!ix86_expand_int_movcc (operands))
22465 /* Mask in the interesting variable. */
22466 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22468 if (!rtx_equal_p (out, orig_out))
22469 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22475 * For comparison with above,
22485 if (! nonimmediate_operand (operands[2], mode))
22486 operands[2] = force_reg (mode, operands[2]);
22487 if (! nonimmediate_operand (operands[3], mode))
22488 operands[3] = force_reg (mode, operands[3]);
22490 if (! register_operand (operands[2], VOIDmode)
22492 || ! register_operand (operands[3], VOIDmode)))
22493 operands[2] = force_reg (mode, operands[2]);
22496 && ! register_operand (operands[3], VOIDmode))
22497 operands[3] = force_reg (mode, operands[3]);
22499 emit_insn (compare_seq);
22500 emit_insn (gen_rtx_SET (operands[0],
22501 gen_rtx_IF_THEN_ELSE (mode,
22502 compare_op, operands[2],
22507 /* Swap, force into registers, or otherwise massage the two operands
22508 to an sse comparison with a mask result. Thus we differ a bit from
22509 ix86_prepare_fp_compare_args which expects to produce a flags result.
22511 The DEST operand exists to help determine whether to commute commutative
22512 operators. The POP0/POP1 operands are updated in place. The new
22513 comparison code is returned, or UNKNOWN if not implementable. */
22515 static enum rtx_code
22516 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22517 rtx *pop0, rtx *pop1)
22523 /* AVX supports all the needed comparisons. */
22526 /* We have no LTGT as an operator. We could implement it with
22527 NE & ORDERED, but this requires an extra temporary. It's
22528 not clear that it's worth it. */
22535 /* These are supported directly. */
22542 /* AVX has 3 operand comparisons, no need to swap anything. */
22545 /* For commutative operators, try to canonicalize the destination
22546 operand to be first in the comparison - this helps reload to
22547 avoid extra moves. */
22548 if (!dest || !rtx_equal_p (dest, *pop1))
22556 /* These are not supported directly before AVX, and furthermore
22557 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22558 comparison operands to transform into something that is
22560 std::swap (*pop0, *pop1);
22561 code = swap_condition (code);
22565 gcc_unreachable ();
22571 /* Detect conditional moves that exactly match min/max operational
22572 semantics. Note that this is IEEE safe, as long as we don't
22573 interchange the operands.
22575 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22576 and TRUE if the operation is successful and instructions are emitted. */
22579 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22580 rtx cmp_op1, rtx if_true, rtx if_false)
22588 else if (code == UNGE)
22589 std::swap (if_true, if_false);
22593 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22595 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22600 mode = GET_MODE (dest);
22602 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22603 but MODE may be a vector mode and thus not appropriate. */
22604 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22606 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22609 if_true = force_reg (mode, if_true);
22610 v = gen_rtvec (2, if_true, if_false);
22611 tmp = gen_rtx_UNSPEC (mode, v, u);
22615 code = is_min ? SMIN : SMAX;
22616 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22619 emit_insn (gen_rtx_SET (dest, tmp));
22623 /* Expand an sse vector comparison. Return the register with the result. */
22626 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22627 rtx op_true, rtx op_false)
22629 machine_mode mode = GET_MODE (dest);
22630 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22632 /* In general case result of comparison can differ from operands' type. */
22633 machine_mode cmp_mode;
22635 /* In AVX512F the result of comparison is an integer mask. */
22636 bool maskcmp = false;
22639 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22641 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22642 gcc_assert (cmp_mode != BLKmode);
22647 cmp_mode = cmp_ops_mode;
22650 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22651 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22652 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22655 || (op_true && reg_overlap_mentioned_p (dest, op_true))
22656 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
22657 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22659 /* Compare patterns for int modes are unspec in AVX512F only. */
22660 if (maskcmp && (code == GT || code == EQ))
22662 rtx (*gen)(rtx, rtx, rtx);
22664 switch (cmp_ops_mode)
22667 gcc_assert (TARGET_AVX512BW);
22668 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22671 gcc_assert (TARGET_AVX512BW);
22672 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22675 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22678 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22686 emit_insn (gen (dest, cmp_op0, cmp_op1));
22690 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22692 if (cmp_mode != mode && !maskcmp)
22694 x = force_reg (cmp_ops_mode, x);
22695 convert_move (dest, x, false);
22698 emit_insn (gen_rtx_SET (dest, x));
22703 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22704 operations. This is used for both scalar and vector conditional moves. */
22707 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22709 machine_mode mode = GET_MODE (dest);
22710 machine_mode cmpmode = GET_MODE (cmp);
22712 /* In AVX512F the result of comparison is an integer mask. */
22713 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22717 /* If we have an integer mask and FP value then we need
22718 to cast mask to FP mode. */
22719 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
22721 cmp = force_reg (cmpmode, cmp);
22722 cmp = gen_rtx_SUBREG (mode, cmp, 0);
22725 if (vector_all_ones_operand (op_true, mode)
22726 && rtx_equal_p (op_false, CONST0_RTX (mode))
22729 emit_insn (gen_rtx_SET (dest, cmp));
22731 else if (op_false == CONST0_RTX (mode)
22734 op_true = force_reg (mode, op_true);
22735 x = gen_rtx_AND (mode, cmp, op_true);
22736 emit_insn (gen_rtx_SET (dest, x));
22738 else if (op_true == CONST0_RTX (mode)
22741 op_false = force_reg (mode, op_false);
22742 x = gen_rtx_NOT (mode, cmp);
22743 x = gen_rtx_AND (mode, x, op_false);
22744 emit_insn (gen_rtx_SET (dest, x));
22746 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22749 op_false = force_reg (mode, op_false);
22750 x = gen_rtx_IOR (mode, cmp, op_false);
22751 emit_insn (gen_rtx_SET (dest, x));
22753 else if (TARGET_XOP
22756 op_true = force_reg (mode, op_true);
22758 if (!nonimmediate_operand (op_false, mode))
22759 op_false = force_reg (mode, op_false);
22761 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22767 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22770 if (!nonimmediate_operand (op_true, mode))
22771 op_true = force_reg (mode, op_true);
22773 op_false = force_reg (mode, op_false);
22779 gen = gen_sse4_1_blendvps;
22783 gen = gen_sse4_1_blendvpd;
22791 gen = gen_sse4_1_pblendvb;
22792 if (mode != V16QImode)
22793 d = gen_reg_rtx (V16QImode);
22794 op_false = gen_lowpart (V16QImode, op_false);
22795 op_true = gen_lowpart (V16QImode, op_true);
22796 cmp = gen_lowpart (V16QImode, cmp);
22801 gen = gen_avx_blendvps256;
22805 gen = gen_avx_blendvpd256;
22813 gen = gen_avx2_pblendvb;
22814 if (mode != V32QImode)
22815 d = gen_reg_rtx (V32QImode);
22816 op_false = gen_lowpart (V32QImode, op_false);
22817 op_true = gen_lowpart (V32QImode, op_true);
22818 cmp = gen_lowpart (V32QImode, cmp);
22823 gen = gen_avx512bw_blendmv64qi;
22826 gen = gen_avx512bw_blendmv32hi;
22829 gen = gen_avx512f_blendmv16si;
22832 gen = gen_avx512f_blendmv8di;
22835 gen = gen_avx512f_blendmv8df;
22838 gen = gen_avx512f_blendmv16sf;
22847 emit_insn (gen (d, op_false, op_true, cmp));
22849 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22853 op_true = force_reg (mode, op_true);
22855 t2 = gen_reg_rtx (mode);
22857 t3 = gen_reg_rtx (mode);
22861 x = gen_rtx_AND (mode, op_true, cmp);
22862 emit_insn (gen_rtx_SET (t2, x));
22864 x = gen_rtx_NOT (mode, cmp);
22865 x = gen_rtx_AND (mode, x, op_false);
22866 emit_insn (gen_rtx_SET (t3, x));
22868 x = gen_rtx_IOR (mode, t3, t2);
22869 emit_insn (gen_rtx_SET (dest, x));
22874 /* Expand a floating-point conditional move. Return true if successful. */
22877 ix86_expand_fp_movcc (rtx operands[])
22879 machine_mode mode = GET_MODE (operands[0]);
22880 enum rtx_code code = GET_CODE (operands[1]);
22881 rtx tmp, compare_op;
22882 rtx op0 = XEXP (operands[1], 0);
22883 rtx op1 = XEXP (operands[1], 1);
22885 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22887 machine_mode cmode;
22889 /* Since we've no cmove for sse registers, don't force bad register
22890 allocation just to gain access to it. Deny movcc when the
22891 comparison mode doesn't match the move mode. */
22892 cmode = GET_MODE (op0);
22893 if (cmode == VOIDmode)
22894 cmode = GET_MODE (op1);
22898 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22899 if (code == UNKNOWN)
22902 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22903 operands[2], operands[3]))
22906 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22907 operands[2], operands[3]);
22908 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22912 if (GET_MODE (op0) == TImode
22913 || (GET_MODE (op0) == DImode
22917 /* The floating point conditional move instructions don't directly
22918 support conditions resulting from a signed integer comparison. */
22920 compare_op = ix86_expand_compare (code, op0, op1);
22921 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22923 tmp = gen_reg_rtx (QImode);
22924 ix86_expand_setcc (tmp, code, op0, op1);
22926 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22929 emit_insn (gen_rtx_SET (operands[0],
22930 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22931 operands[2], operands[3])));
22936 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
22939 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
22960 gcc_unreachable ();
22964 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
22967 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
22984 gcc_unreachable ();
22988 /* Return immediate value to be used in UNSPEC_PCMP
22989 for comparison CODE in MODE. */
22992 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
22994 if (FLOAT_MODE_P (mode))
22995 return ix86_fp_cmp_code_to_pcmp_immediate (code);
22996 return ix86_int_cmp_code_to_pcmp_immediate (code);
22999 /* Expand AVX-512 vector comparison. */
23002 ix86_expand_mask_vec_cmp (rtx operands[])
23004 machine_mode mask_mode = GET_MODE (operands[0]);
23005 machine_mode cmp_mode = GET_MODE (operands[2]);
23006 enum rtx_code code = GET_CODE (operands[1]);
23007 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23017 unspec_code = UNSPEC_UNSIGNED_PCMP;
23021 unspec_code = UNSPEC_PCMP;
23024 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23027 emit_insn (gen_rtx_SET (operands[0], unspec));
23032 /* Expand fp vector comparison. */
23035 ix86_expand_fp_vec_cmp (rtx operands[])
23037 enum rtx_code code = GET_CODE (operands[1]);
23040 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23041 &operands[2], &operands[3]);
23042 if (code == UNKNOWN)
23045 switch (GET_CODE (operands[1]))
23048 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23049 operands[3], NULL, NULL);
23050 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23051 operands[3], NULL, NULL);
23055 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23056 operands[3], NULL, NULL);
23057 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23058 operands[3], NULL, NULL);
23062 gcc_unreachable ();
23064 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23068 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23069 operands[1], operands[2]);
23071 if (operands[0] != cmp)
23072 emit_move_insn (operands[0], cmp);
23078 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23079 rtx op_true, rtx op_false, bool *negate)
23081 machine_mode data_mode = GET_MODE (dest);
23082 machine_mode mode = GET_MODE (cop0);
23087 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23089 && (mode == V16QImode || mode == V8HImode
23090 || mode == V4SImode || mode == V2DImode))
23094 /* Canonicalize the comparison to EQ, GT, GTU. */
23105 code = reverse_condition (code);
23111 code = reverse_condition (code);
23117 std::swap (cop0, cop1);
23118 code = swap_condition (code);
23122 gcc_unreachable ();
23125 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23126 if (mode == V2DImode)
23131 /* SSE4.1 supports EQ. */
23132 if (!TARGET_SSE4_1)
23138 /* SSE4.2 supports GT/GTU. */
23139 if (!TARGET_SSE4_2)
23144 gcc_unreachable ();
23148 /* Unsigned parallel compare is not supported by the hardware.
23149 Play some tricks to turn this into a signed comparison
23153 cop0 = force_reg (mode, cop0);
23165 rtx (*gen_sub3) (rtx, rtx, rtx);
23169 case V16SImode: gen_sub3 = gen_subv16si3; break;
23170 case V8DImode: gen_sub3 = gen_subv8di3; break;
23171 case V8SImode: gen_sub3 = gen_subv8si3; break;
23172 case V4DImode: gen_sub3 = gen_subv4di3; break;
23173 case V4SImode: gen_sub3 = gen_subv4si3; break;
23174 case V2DImode: gen_sub3 = gen_subv2di3; break;
23176 gcc_unreachable ();
23178 /* Subtract (-(INT MAX) - 1) from both operands to make
23180 mask = ix86_build_signbit_mask (mode, true, false);
23181 t1 = gen_reg_rtx (mode);
23182 emit_insn (gen_sub3 (t1, cop0, mask));
23184 t2 = gen_reg_rtx (mode);
23185 emit_insn (gen_sub3 (t2, cop1, mask));
23199 /* Perform a parallel unsigned saturating subtraction. */
23200 x = gen_reg_rtx (mode);
23201 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23205 cop1 = CONST0_RTX (mode);
23207 *negate = !*negate;
23211 gcc_unreachable ();
23217 std::swap (op_true, op_false);
23219 /* Allow the comparison to be done in one mode, but the movcc to
23220 happen in another mode. */
23221 if (data_mode == mode)
23223 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23224 op_true, op_false);
23228 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23229 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23230 op_true, op_false);
23231 if (GET_MODE (x) == mode)
23232 x = gen_lowpart (data_mode, x);
23238 /* Expand integer vector comparison. */
23241 ix86_expand_int_vec_cmp (rtx operands[])
23243 rtx_code code = GET_CODE (operands[1]);
23244 bool negate = false;
23245 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23246 operands[3], NULL, NULL, &negate);
23252 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23253 CONST0_RTX (GET_MODE (cmp)),
23254 NULL, NULL, &negate);
23256 gcc_assert (!negate);
23258 if (operands[0] != cmp)
23259 emit_move_insn (operands[0], cmp);
23264 /* Expand a floating-point vector conditional move; a vcond operation
23265 rather than a movcc operation. */
23268 ix86_expand_fp_vcond (rtx operands[])
23270 enum rtx_code code = GET_CODE (operands[3]);
23273 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23274 &operands[4], &operands[5]);
23275 if (code == UNKNOWN)
23278 switch (GET_CODE (operands[3]))
23281 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23282 operands[5], operands[0], operands[0]);
23283 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23284 operands[5], operands[1], operands[2]);
23288 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23289 operands[5], operands[0], operands[0]);
23290 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23291 operands[5], operands[1], operands[2]);
23295 gcc_unreachable ();
23297 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23299 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23303 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23304 operands[5], operands[1], operands[2]))
23307 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23308 operands[1], operands[2]);
23309 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23313 /* Expand a signed/unsigned integral vector conditional move. */
23316 ix86_expand_int_vcond (rtx operands[])
23318 machine_mode data_mode = GET_MODE (operands[0]);
23319 machine_mode mode = GET_MODE (operands[4]);
23320 enum rtx_code code = GET_CODE (operands[3]);
23321 bool negate = false;
23324 cop0 = operands[4];
23325 cop1 = operands[5];
23327 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23328 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23329 if ((code == LT || code == GE)
23330 && data_mode == mode
23331 && cop1 == CONST0_RTX (mode)
23332 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23333 && GET_MODE_UNIT_SIZE (data_mode) > 1
23334 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23335 && (GET_MODE_SIZE (data_mode) == 16
23336 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23338 rtx negop = operands[2 - (code == LT)];
23339 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23340 if (negop == CONST1_RTX (data_mode))
23342 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23343 operands[0], 1, OPTAB_DIRECT);
23344 if (res != operands[0])
23345 emit_move_insn (operands[0], res);
23348 else if (GET_MODE_INNER (data_mode) != DImode
23349 && vector_all_ones_operand (negop, data_mode))
23351 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23352 operands[0], 0, OPTAB_DIRECT);
23353 if (res != operands[0])
23354 emit_move_insn (operands[0], res);
23359 if (!nonimmediate_operand (cop1, mode))
23360 cop1 = force_reg (mode, cop1);
23361 if (!general_operand (operands[1], data_mode))
23362 operands[1] = force_reg (data_mode, operands[1]);
23363 if (!general_operand (operands[2], data_mode))
23364 operands[2] = force_reg (data_mode, operands[2]);
23366 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23367 operands[1], operands[2], &negate);
23372 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23373 operands[2-negate]);
23377 /* AVX512F does support 64-byte integer vector operations,
23378 thus the longest vector we are faced with is V64QImode. */
23379 #define MAX_VECT_LEN 64
23381 struct expand_vec_perm_d
23383 rtx target, op0, op1;
23384 unsigned char perm[MAX_VECT_LEN];
23385 machine_mode vmode;
23386 unsigned char nelt;
23387 bool one_operand_p;
23392 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23393 struct expand_vec_perm_d *d)
23395 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23396 expander, so args are either in d, or in op0, op1 etc. */
23397 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23398 machine_mode maskmode = mode;
23399 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23404 if (TARGET_AVX512VL && TARGET_AVX512BW)
23405 gen = gen_avx512vl_vpermi2varv8hi3;
23408 if (TARGET_AVX512VL && TARGET_AVX512BW)
23409 gen = gen_avx512vl_vpermi2varv16hi3;
23412 if (TARGET_AVX512VBMI)
23413 gen = gen_avx512bw_vpermi2varv64qi3;
23416 if (TARGET_AVX512BW)
23417 gen = gen_avx512bw_vpermi2varv32hi3;
23420 if (TARGET_AVX512VL)
23421 gen = gen_avx512vl_vpermi2varv4si3;
23424 if (TARGET_AVX512VL)
23425 gen = gen_avx512vl_vpermi2varv8si3;
23428 if (TARGET_AVX512F)
23429 gen = gen_avx512f_vpermi2varv16si3;
23432 if (TARGET_AVX512VL)
23434 gen = gen_avx512vl_vpermi2varv4sf3;
23435 maskmode = V4SImode;
23439 if (TARGET_AVX512VL)
23441 gen = gen_avx512vl_vpermi2varv8sf3;
23442 maskmode = V8SImode;
23446 if (TARGET_AVX512F)
23448 gen = gen_avx512f_vpermi2varv16sf3;
23449 maskmode = V16SImode;
23453 if (TARGET_AVX512VL)
23454 gen = gen_avx512vl_vpermi2varv2di3;
23457 if (TARGET_AVX512VL)
23458 gen = gen_avx512vl_vpermi2varv4di3;
23461 if (TARGET_AVX512F)
23462 gen = gen_avx512f_vpermi2varv8di3;
23465 if (TARGET_AVX512VL)
23467 gen = gen_avx512vl_vpermi2varv2df3;
23468 maskmode = V2DImode;
23472 if (TARGET_AVX512VL)
23474 gen = gen_avx512vl_vpermi2varv4df3;
23475 maskmode = V4DImode;
23479 if (TARGET_AVX512F)
23481 gen = gen_avx512f_vpermi2varv8df3;
23482 maskmode = V8DImode;
23492 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23493 expander, so args are either in d, or in op0, op1 etc. */
23497 target = d->target;
23500 for (int i = 0; i < d->nelt; ++i)
23501 vec[i] = GEN_INT (d->perm[i]);
23502 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23505 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23509 /* Expand a variable vector permutation. */
23512 ix86_expand_vec_perm (rtx operands[])
23514 rtx target = operands[0];
23515 rtx op0 = operands[1];
23516 rtx op1 = operands[2];
23517 rtx mask = operands[3];
23518 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23519 machine_mode mode = GET_MODE (op0);
23520 machine_mode maskmode = GET_MODE (mask);
23522 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23524 /* Number of elements in the vector. */
23525 w = GET_MODE_NUNITS (mode);
23526 e = GET_MODE_UNIT_SIZE (mode);
23527 gcc_assert (w <= 64);
23529 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23534 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23536 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23537 an constant shuffle operand. With a tiny bit of effort we can
23538 use VPERMD instead. A re-interpretation stall for V4DFmode is
23539 unfortunate but there's no avoiding it.
23540 Similarly for V16HImode we don't have instructions for variable
23541 shuffling, while for V32QImode we can use after preparing suitable
23542 masks vpshufb; vpshufb; vpermq; vpor. */
23544 if (mode == V16HImode)
23546 maskmode = mode = V32QImode;
23552 maskmode = mode = V8SImode;
23556 t1 = gen_reg_rtx (maskmode);
23558 /* Replicate the low bits of the V4DImode mask into V8SImode:
23560 t1 = { A A B B C C D D }. */
23561 for (i = 0; i < w / 2; ++i)
23562 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23563 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23564 vt = force_reg (maskmode, vt);
23565 mask = gen_lowpart (maskmode, mask);
23566 if (maskmode == V8SImode)
23567 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23569 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23571 /* Multiply the shuffle indicies by two. */
23572 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23575 /* Add one to the odd shuffle indicies:
23576 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23577 for (i = 0; i < w / 2; ++i)
23579 vec[i * 2] = const0_rtx;
23580 vec[i * 2 + 1] = const1_rtx;
23582 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23583 vt = validize_mem (force_const_mem (maskmode, vt));
23584 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23587 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23588 operands[3] = mask = t1;
23589 target = gen_reg_rtx (mode);
23590 op0 = gen_lowpart (mode, op0);
23591 op1 = gen_lowpart (mode, op1);
23597 /* The VPERMD and VPERMPS instructions already properly ignore
23598 the high bits of the shuffle elements. No need for us to
23599 perform an AND ourselves. */
23600 if (one_operand_shuffle)
23602 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23603 if (target != operands[0])
23604 emit_move_insn (operands[0],
23605 gen_lowpart (GET_MODE (operands[0]), target));
23609 t1 = gen_reg_rtx (V8SImode);
23610 t2 = gen_reg_rtx (V8SImode);
23611 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23612 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23618 mask = gen_lowpart (V8SImode, mask);
23619 if (one_operand_shuffle)
23620 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23623 t1 = gen_reg_rtx (V8SFmode);
23624 t2 = gen_reg_rtx (V8SFmode);
23625 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23626 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23632 /* By combining the two 128-bit input vectors into one 256-bit
23633 input vector, we can use VPERMD and VPERMPS for the full
23634 two-operand shuffle. */
23635 t1 = gen_reg_rtx (V8SImode);
23636 t2 = gen_reg_rtx (V8SImode);
23637 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23638 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23639 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23640 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23644 t1 = gen_reg_rtx (V8SFmode);
23645 t2 = gen_reg_rtx (V8SImode);
23646 mask = gen_lowpart (V4SImode, mask);
23647 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23648 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23649 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23650 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23654 t1 = gen_reg_rtx (V32QImode);
23655 t2 = gen_reg_rtx (V32QImode);
23656 t3 = gen_reg_rtx (V32QImode);
23657 vt2 = GEN_INT (-128);
23658 for (i = 0; i < 32; i++)
23660 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23661 vt = force_reg (V32QImode, vt);
23662 for (i = 0; i < 32; i++)
23663 vec[i] = i < 16 ? vt2 : const0_rtx;
23664 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23665 vt2 = force_reg (V32QImode, vt2);
23666 /* From mask create two adjusted masks, which contain the same
23667 bits as mask in the low 7 bits of each vector element.
23668 The first mask will have the most significant bit clear
23669 if it requests element from the same 128-bit lane
23670 and MSB set if it requests element from the other 128-bit lane.
23671 The second mask will have the opposite values of the MSB,
23672 and additionally will have its 128-bit lanes swapped.
23673 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23674 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23675 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23676 stands for other 12 bytes. */
23677 /* The bit whether element is from the same lane or the other
23678 lane is bit 4, so shift it up by 3 to the MSB position. */
23679 t5 = gen_reg_rtx (V4DImode);
23680 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23682 /* Clear MSB bits from the mask just in case it had them set. */
23683 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23684 /* After this t1 will have MSB set for elements from other lane. */
23685 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23686 /* Clear bits other than MSB. */
23687 emit_insn (gen_andv32qi3 (t1, t1, vt));
23688 /* Or in the lower bits from mask into t3. */
23689 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23690 /* And invert MSB bits in t1, so MSB is set for elements from the same
23692 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23693 /* Swap 128-bit lanes in t3. */
23694 t6 = gen_reg_rtx (V4DImode);
23695 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23696 const2_rtx, GEN_INT (3),
23697 const0_rtx, const1_rtx));
23698 /* And or in the lower bits from mask into t1. */
23699 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23700 if (one_operand_shuffle)
23702 /* Each of these shuffles will put 0s in places where
23703 element from the other 128-bit lane is needed, otherwise
23704 will shuffle in the requested value. */
23705 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23706 gen_lowpart (V32QImode, t6)));
23707 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23708 /* For t3 the 128-bit lanes are swapped again. */
23709 t7 = gen_reg_rtx (V4DImode);
23710 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23711 const2_rtx, GEN_INT (3),
23712 const0_rtx, const1_rtx));
23713 /* And oring both together leads to the result. */
23714 emit_insn (gen_iorv32qi3 (target, t1,
23715 gen_lowpart (V32QImode, t7)));
23716 if (target != operands[0])
23717 emit_move_insn (operands[0],
23718 gen_lowpart (GET_MODE (operands[0]), target));
23722 t4 = gen_reg_rtx (V32QImode);
23723 /* Similarly to the above one_operand_shuffle code,
23724 just for repeated twice for each operand. merge_two:
23725 code will merge the two results together. */
23726 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23727 gen_lowpart (V32QImode, t6)));
23728 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23729 gen_lowpart (V32QImode, t6)));
23730 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23731 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23732 t7 = gen_reg_rtx (V4DImode);
23733 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23734 const2_rtx, GEN_INT (3),
23735 const0_rtx, const1_rtx));
23736 t8 = gen_reg_rtx (V4DImode);
23737 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23738 const2_rtx, GEN_INT (3),
23739 const0_rtx, const1_rtx));
23740 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23741 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23747 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23754 /* The XOP VPPERM insn supports three inputs. By ignoring the
23755 one_operand_shuffle special case, we avoid creating another
23756 set of constant vectors in memory. */
23757 one_operand_shuffle = false;
23759 /* mask = mask & {2*w-1, ...} */
23760 vt = GEN_INT (2*w - 1);
23764 /* mask = mask & {w-1, ...} */
23765 vt = GEN_INT (w - 1);
23768 for (i = 0; i < w; i++)
23770 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23771 mask = expand_simple_binop (maskmode, AND, mask, vt,
23772 NULL_RTX, 0, OPTAB_DIRECT);
23774 /* For non-QImode operations, convert the word permutation control
23775 into a byte permutation control. */
23776 if (mode != V16QImode)
23778 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23779 GEN_INT (exact_log2 (e)),
23780 NULL_RTX, 0, OPTAB_DIRECT);
23782 /* Convert mask to vector of chars. */
23783 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23785 /* Replicate each of the input bytes into byte positions:
23786 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23787 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23788 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23789 for (i = 0; i < 16; ++i)
23790 vec[i] = GEN_INT (i/e * e);
23791 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23792 vt = validize_mem (force_const_mem (V16QImode, vt));
23794 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23796 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23798 /* Convert it into the byte positions by doing
23799 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23800 for (i = 0; i < 16; ++i)
23801 vec[i] = GEN_INT (i % e);
23802 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23803 vt = validize_mem (force_const_mem (V16QImode, vt));
23804 emit_insn (gen_addv16qi3 (mask, mask, vt));
23807 /* The actual shuffle operations all operate on V16QImode. */
23808 op0 = gen_lowpart (V16QImode, op0);
23809 op1 = gen_lowpart (V16QImode, op1);
23813 if (GET_MODE (target) != V16QImode)
23814 target = gen_reg_rtx (V16QImode);
23815 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23816 if (target != operands[0])
23817 emit_move_insn (operands[0],
23818 gen_lowpart (GET_MODE (operands[0]), target));
23820 else if (one_operand_shuffle)
23822 if (GET_MODE (target) != V16QImode)
23823 target = gen_reg_rtx (V16QImode);
23824 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23825 if (target != operands[0])
23826 emit_move_insn (operands[0],
23827 gen_lowpart (GET_MODE (operands[0]), target));
23834 /* Shuffle the two input vectors independently. */
23835 t1 = gen_reg_rtx (V16QImode);
23836 t2 = gen_reg_rtx (V16QImode);
23837 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23838 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23841 /* Then merge them together. The key is whether any given control
23842 element contained a bit set that indicates the second word. */
23843 mask = operands[3];
23845 if (maskmode == V2DImode && !TARGET_SSE4_1)
23847 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23848 more shuffle to convert the V2DI input mask into a V4SI
23849 input mask. At which point the masking that expand_int_vcond
23850 will work as desired. */
23851 rtx t3 = gen_reg_rtx (V4SImode);
23852 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23853 const0_rtx, const0_rtx,
23854 const2_rtx, const2_rtx));
23856 maskmode = V4SImode;
23860 for (i = 0; i < w; i++)
23862 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23863 vt = force_reg (maskmode, vt);
23864 mask = expand_simple_binop (maskmode, AND, mask, vt,
23865 NULL_RTX, 0, OPTAB_DIRECT);
23867 if (GET_MODE (target) != mode)
23868 target = gen_reg_rtx (mode);
23870 xops[1] = gen_lowpart (mode, t2);
23871 xops[2] = gen_lowpart (mode, t1);
23872 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23875 ok = ix86_expand_int_vcond (xops);
23877 if (target != operands[0])
23878 emit_move_insn (operands[0],
23879 gen_lowpart (GET_MODE (operands[0]), target));
23883 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23884 true if we should do zero extension, else sign extension. HIGH_P is
23885 true if we want the N/2 high elements, else the low elements. */
23888 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23890 machine_mode imode = GET_MODE (src);
23895 rtx (*unpack)(rtx, rtx);
23896 rtx (*extract)(rtx, rtx) = NULL;
23897 machine_mode halfmode = BLKmode;
23903 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23905 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23906 halfmode = V32QImode;
23908 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23912 unpack = gen_avx2_zero_extendv16qiv16hi2;
23914 unpack = gen_avx2_sign_extendv16qiv16hi2;
23915 halfmode = V16QImode;
23917 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23921 unpack = gen_avx512f_zero_extendv16hiv16si2;
23923 unpack = gen_avx512f_sign_extendv16hiv16si2;
23924 halfmode = V16HImode;
23926 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23930 unpack = gen_avx2_zero_extendv8hiv8si2;
23932 unpack = gen_avx2_sign_extendv8hiv8si2;
23933 halfmode = V8HImode;
23935 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23939 unpack = gen_avx512f_zero_extendv8siv8di2;
23941 unpack = gen_avx512f_sign_extendv8siv8di2;
23942 halfmode = V8SImode;
23944 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
23948 unpack = gen_avx2_zero_extendv4siv4di2;
23950 unpack = gen_avx2_sign_extendv4siv4di2;
23951 halfmode = V4SImode;
23953 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
23957 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
23959 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
23963 unpack = gen_sse4_1_zero_extendv4hiv4si2;
23965 unpack = gen_sse4_1_sign_extendv4hiv4si2;
23969 unpack = gen_sse4_1_zero_extendv2siv2di2;
23971 unpack = gen_sse4_1_sign_extendv2siv2di2;
23974 gcc_unreachable ();
23977 if (GET_MODE_SIZE (imode) >= 32)
23979 tmp = gen_reg_rtx (halfmode);
23980 emit_insn (extract (tmp, src));
23984 /* Shift higher 8 bytes to lower 8 bytes. */
23985 tmp = gen_reg_rtx (V1TImode);
23986 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
23988 tmp = gen_lowpart (imode, tmp);
23993 emit_insn (unpack (dest, tmp));
23997 rtx (*unpack)(rtx, rtx, rtx);
24003 unpack = gen_vec_interleave_highv16qi;
24005 unpack = gen_vec_interleave_lowv16qi;
24009 unpack = gen_vec_interleave_highv8hi;
24011 unpack = gen_vec_interleave_lowv8hi;
24015 unpack = gen_vec_interleave_highv4si;
24017 unpack = gen_vec_interleave_lowv4si;
24020 gcc_unreachable ();
24024 tmp = force_reg (imode, CONST0_RTX (imode));
24026 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24027 src, pc_rtx, pc_rtx);
24029 rtx tmp2 = gen_reg_rtx (imode);
24030 emit_insn (unpack (tmp2, src, tmp));
24031 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24035 /* Expand conditional increment or decrement using adb/sbb instructions.
24036 The default case using setcc followed by the conditional move can be
24037 done by generic code. */
24039 ix86_expand_int_addcc (rtx operands[])
24041 enum rtx_code code = GET_CODE (operands[1]);
24043 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24045 rtx val = const0_rtx;
24046 bool fpcmp = false;
24048 rtx op0 = XEXP (operands[1], 0);
24049 rtx op1 = XEXP (operands[1], 1);
24051 if (operands[3] != const1_rtx
24052 && operands[3] != constm1_rtx)
24054 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24056 code = GET_CODE (compare_op);
24058 flags = XEXP (compare_op, 0);
24060 if (GET_MODE (flags) == CCFPmode
24061 || GET_MODE (flags) == CCFPUmode)
24064 code = ix86_fp_compare_code_to_integer (code);
24071 PUT_CODE (compare_op,
24072 reverse_condition_maybe_unordered
24073 (GET_CODE (compare_op)));
24075 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24078 mode = GET_MODE (operands[0]);
24080 /* Construct either adc or sbb insn. */
24081 if ((code == LTU) == (operands[3] == constm1_rtx))
24086 insn = gen_subqi3_carry;
24089 insn = gen_subhi3_carry;
24092 insn = gen_subsi3_carry;
24095 insn = gen_subdi3_carry;
24098 gcc_unreachable ();
24106 insn = gen_addqi3_carry;
24109 insn = gen_addhi3_carry;
24112 insn = gen_addsi3_carry;
24115 insn = gen_adddi3_carry;
24118 gcc_unreachable ();
24121 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24127 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24128 but works for floating pointer parameters and nonoffsetable memories.
24129 For pushes, it returns just stack offsets; the values will be saved
24130 in the right order. Maximally three parts are generated. */
24133 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24138 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24140 size = (GET_MODE_SIZE (mode) + 4) / 8;
24142 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24143 gcc_assert (size >= 2 && size <= 4);
24145 /* Optimize constant pool reference to immediates. This is used by fp
24146 moves, that force all constants to memory to allow combining. */
24147 if (MEM_P (operand) && MEM_READONLY_P (operand))
24149 rtx tmp = maybe_get_pool_constant (operand);
24154 if (MEM_P (operand) && !offsettable_memref_p (operand))
24156 /* The only non-offsetable memories we handle are pushes. */
24157 int ok = push_operand (operand, VOIDmode);
24161 operand = copy_rtx (operand);
24162 PUT_MODE (operand, word_mode);
24163 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24167 if (GET_CODE (operand) == CONST_VECTOR)
24169 machine_mode imode = int_mode_for_mode (mode);
24170 /* Caution: if we looked through a constant pool memory above,
24171 the operand may actually have a different mode now. That's
24172 ok, since we want to pun this all the way back to an integer. */
24173 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24174 gcc_assert (operand != NULL);
24180 if (mode == DImode)
24181 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24186 if (REG_P (operand))
24188 gcc_assert (reload_completed);
24189 for (i = 0; i < size; i++)
24190 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24192 else if (offsettable_memref_p (operand))
24194 operand = adjust_address (operand, SImode, 0);
24195 parts[0] = operand;
24196 for (i = 1; i < size; i++)
24197 parts[i] = adjust_address (operand, SImode, 4 * i);
24199 else if (CONST_DOUBLE_P (operand))
24201 const REAL_VALUE_TYPE *r;
24204 r = CONST_DOUBLE_REAL_VALUE (operand);
24208 real_to_target (l, r, mode);
24209 parts[3] = gen_int_mode (l[3], SImode);
24210 parts[2] = gen_int_mode (l[2], SImode);
24213 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24214 long double may not be 80-bit. */
24215 real_to_target (l, r, mode);
24216 parts[2] = gen_int_mode (l[2], SImode);
24219 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24222 gcc_unreachable ();
24224 parts[1] = gen_int_mode (l[1], SImode);
24225 parts[0] = gen_int_mode (l[0], SImode);
24228 gcc_unreachable ();
24233 if (mode == TImode)
24234 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24235 if (mode == XFmode || mode == TFmode)
24237 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24238 if (REG_P (operand))
24240 gcc_assert (reload_completed);
24241 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24242 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24244 else if (offsettable_memref_p (operand))
24246 operand = adjust_address (operand, DImode, 0);
24247 parts[0] = operand;
24248 parts[1] = adjust_address (operand, upper_mode, 8);
24250 else if (CONST_DOUBLE_P (operand))
24254 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24256 /* real_to_target puts 32-bit pieces in each long. */
24259 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
24260 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
24263 if (upper_mode == SImode)
24264 parts[1] = gen_int_mode (l[2], SImode);
24268 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
24269 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
24273 gcc_unreachable ();
24280 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24281 Return false when normal moves are needed; true when all required
24282 insns have been emitted. Operands 2-4 contain the input values
24283 int the correct order; operands 5-7 contain the output values. */
24286 ix86_split_long_move (rtx operands[])
24291 int collisions = 0;
24292 machine_mode mode = GET_MODE (operands[0]);
24293 bool collisionparts[4];
24295 /* The DFmode expanders may ask us to move double.
24296 For 64bit target this is single move. By hiding the fact
24297 here we simplify i386.md splitters. */
24298 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24300 /* Optimize constant pool reference to immediates. This is used by
24301 fp moves, that force all constants to memory to allow combining. */
24303 if (MEM_P (operands[1])
24304 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24305 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24306 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24307 if (push_operand (operands[0], VOIDmode))
24309 operands[0] = copy_rtx (operands[0]);
24310 PUT_MODE (operands[0], word_mode);
24313 operands[0] = gen_lowpart (DImode, operands[0]);
24314 operands[1] = gen_lowpart (DImode, operands[1]);
24315 emit_move_insn (operands[0], operands[1]);
24319 /* The only non-offsettable memory we handle is push. */
24320 if (push_operand (operands[0], VOIDmode))
24323 gcc_assert (!MEM_P (operands[0])
24324 || offsettable_memref_p (operands[0]));
24326 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24327 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24329 /* When emitting push, take care for source operands on the stack. */
24330 if (push && MEM_P (operands[1])
24331 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24333 rtx src_base = XEXP (part[1][nparts - 1], 0);
24335 /* Compensate for the stack decrement by 4. */
24336 if (!TARGET_64BIT && nparts == 3
24337 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24338 src_base = plus_constant (Pmode, src_base, 4);
24340 /* src_base refers to the stack pointer and is
24341 automatically decreased by emitted push. */
24342 for (i = 0; i < nparts; i++)
24343 part[1][i] = change_address (part[1][i],
24344 GET_MODE (part[1][i]), src_base);
24347 /* We need to do copy in the right order in case an address register
24348 of the source overlaps the destination. */
24349 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24353 for (i = 0; i < nparts; i++)
24356 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24357 if (collisionparts[i])
24361 /* Collision in the middle part can be handled by reordering. */
24362 if (collisions == 1 && nparts == 3 && collisionparts [1])
24364 std::swap (part[0][1], part[0][2]);
24365 std::swap (part[1][1], part[1][2]);
24367 else if (collisions == 1
24369 && (collisionparts [1] || collisionparts [2]))
24371 if (collisionparts [1])
24373 std::swap (part[0][1], part[0][2]);
24374 std::swap (part[1][1], part[1][2]);
24378 std::swap (part[0][2], part[0][3]);
24379 std::swap (part[1][2], part[1][3]);
24383 /* If there are more collisions, we can't handle it by reordering.
24384 Do an lea to the last part and use only one colliding move. */
24385 else if (collisions > 1)
24387 rtx base, addr, tls_base = NULL_RTX;
24391 base = part[0][nparts - 1];
24393 /* Handle the case when the last part isn't valid for lea.
24394 Happens in 64-bit mode storing the 12-byte XFmode. */
24395 if (GET_MODE (base) != Pmode)
24396 base = gen_rtx_REG (Pmode, REGNO (base));
24398 addr = XEXP (part[1][0], 0);
24399 if (TARGET_TLS_DIRECT_SEG_REFS)
24401 struct ix86_address parts;
24402 int ok = ix86_decompose_address (addr, &parts);
24404 if (parts.seg == DEFAULT_TLS_SEG_REG)
24406 /* It is not valid to use %gs: or %fs: in
24407 lea though, so we need to remove it from the
24408 address used for lea and add it to each individual
24409 memory loads instead. */
24410 addr = copy_rtx (addr);
24412 while (GET_CODE (*x) == PLUS)
24414 for (i = 0; i < 2; i++)
24416 rtx u = XEXP (*x, i);
24417 if (GET_CODE (u) == ZERO_EXTEND)
24419 if (GET_CODE (u) == UNSPEC
24420 && XINT (u, 1) == UNSPEC_TP)
24422 tls_base = XEXP (*x, i);
24423 *x = XEXP (*x, 1 - i);
24431 gcc_assert (tls_base);
24434 emit_insn (gen_rtx_SET (base, addr));
24436 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24437 part[1][0] = replace_equiv_address (part[1][0], base);
24438 for (i = 1; i < nparts; i++)
24441 base = copy_rtx (base);
24442 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24443 part[1][i] = replace_equiv_address (part[1][i], tmp);
24454 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24455 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24456 stack_pointer_rtx, GEN_INT (-4)));
24457 emit_move_insn (part[0][2], part[1][2]);
24459 else if (nparts == 4)
24461 emit_move_insn (part[0][3], part[1][3]);
24462 emit_move_insn (part[0][2], part[1][2]);
24467 /* In 64bit mode we don't have 32bit push available. In case this is
24468 register, it is OK - we will just use larger counterpart. We also
24469 retype memory - these comes from attempt to avoid REX prefix on
24470 moving of second half of TFmode value. */
24471 if (GET_MODE (part[1][1]) == SImode)
24473 switch (GET_CODE (part[1][1]))
24476 part[1][1] = adjust_address (part[1][1], DImode, 0);
24480 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24484 gcc_unreachable ();
24487 if (GET_MODE (part[1][0]) == SImode)
24488 part[1][0] = part[1][1];
24491 emit_move_insn (part[0][1], part[1][1]);
24492 emit_move_insn (part[0][0], part[1][0]);
24496 /* Choose correct order to not overwrite the source before it is copied. */
24497 if ((REG_P (part[0][0])
24498 && REG_P (part[1][1])
24499 && (REGNO (part[0][0]) == REGNO (part[1][1])
24501 && REGNO (part[0][0]) == REGNO (part[1][2]))
24503 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24505 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24507 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24509 operands[2 + i] = part[0][j];
24510 operands[6 + i] = part[1][j];
24515 for (i = 0; i < nparts; i++)
24517 operands[2 + i] = part[0][i];
24518 operands[6 + i] = part[1][i];
24522 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24523 if (optimize_insn_for_size_p ())
24525 for (j = 0; j < nparts - 1; j++)
24526 if (CONST_INT_P (operands[6 + j])
24527 && operands[6 + j] != const0_rtx
24528 && REG_P (operands[2 + j]))
24529 for (i = j; i < nparts - 1; i++)
24530 if (CONST_INT_P (operands[7 + i])
24531 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24532 operands[7 + i] = operands[2 + j];
24535 for (i = 0; i < nparts; i++)
24536 emit_move_insn (operands[2 + i], operands[6 + i]);
24541 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24542 left shift by a constant, either using a single shift or
24543 a sequence of add instructions. */
24546 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24548 rtx (*insn)(rtx, rtx, rtx);
24551 || (count * ix86_cost->add <= ix86_cost->shift_const
24552 && !optimize_insn_for_size_p ()))
24554 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24555 while (count-- > 0)
24556 emit_insn (insn (operand, operand, operand));
24560 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24561 emit_insn (insn (operand, operand, GEN_INT (count)));
24566 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24568 rtx (*gen_ashl3)(rtx, rtx, rtx);
24569 rtx (*gen_shld)(rtx, rtx, rtx);
24570 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24572 rtx low[2], high[2];
24575 if (CONST_INT_P (operands[2]))
24577 split_double_mode (mode, operands, 2, low, high);
24578 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24580 if (count >= half_width)
24582 emit_move_insn (high[0], low[1]);
24583 emit_move_insn (low[0], const0_rtx);
24585 if (count > half_width)
24586 ix86_expand_ashl_const (high[0], count - half_width, mode);
24590 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24592 if (!rtx_equal_p (operands[0], operands[1]))
24593 emit_move_insn (operands[0], operands[1]);
24595 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24596 ix86_expand_ashl_const (low[0], count, mode);
24601 split_double_mode (mode, operands, 1, low, high);
24603 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24605 if (operands[1] == const1_rtx)
24607 /* Assuming we've chosen a QImode capable registers, then 1 << N
24608 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24609 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24611 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24613 ix86_expand_clear (low[0]);
24614 ix86_expand_clear (high[0]);
24615 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24617 d = gen_lowpart (QImode, low[0]);
24618 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24619 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24620 emit_insn (gen_rtx_SET (d, s));
24622 d = gen_lowpart (QImode, high[0]);
24623 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24624 s = gen_rtx_NE (QImode, flags, const0_rtx);
24625 emit_insn (gen_rtx_SET (d, s));
24628 /* Otherwise, we can get the same results by manually performing
24629 a bit extract operation on bit 5/6, and then performing the two
24630 shifts. The two methods of getting 0/1 into low/high are exactly
24631 the same size. Avoiding the shift in the bit extract case helps
24632 pentium4 a bit; no one else seems to care much either way. */
24635 machine_mode half_mode;
24636 rtx (*gen_lshr3)(rtx, rtx, rtx);
24637 rtx (*gen_and3)(rtx, rtx, rtx);
24638 rtx (*gen_xor3)(rtx, rtx, rtx);
24639 HOST_WIDE_INT bits;
24642 if (mode == DImode)
24644 half_mode = SImode;
24645 gen_lshr3 = gen_lshrsi3;
24646 gen_and3 = gen_andsi3;
24647 gen_xor3 = gen_xorsi3;
24652 half_mode = DImode;
24653 gen_lshr3 = gen_lshrdi3;
24654 gen_and3 = gen_anddi3;
24655 gen_xor3 = gen_xordi3;
24659 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24660 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24662 x = gen_lowpart (half_mode, operands[2]);
24663 emit_insn (gen_rtx_SET (high[0], x));
24665 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24666 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24667 emit_move_insn (low[0], high[0]);
24668 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24671 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24672 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24676 if (operands[1] == constm1_rtx)
24678 /* For -1 << N, we can avoid the shld instruction, because we
24679 know that we're shifting 0...31/63 ones into a -1. */
24680 emit_move_insn (low[0], constm1_rtx);
24681 if (optimize_insn_for_size_p ())
24682 emit_move_insn (high[0], low[0]);
24684 emit_move_insn (high[0], constm1_rtx);
24688 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24690 if (!rtx_equal_p (operands[0], operands[1]))
24691 emit_move_insn (operands[0], operands[1]);
24693 split_double_mode (mode, operands, 1, low, high);
24694 emit_insn (gen_shld (high[0], low[0], operands[2]));
24697 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24699 if (TARGET_CMOVE && scratch)
24701 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24702 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24704 ix86_expand_clear (scratch);
24705 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24709 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24710 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24712 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24717 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24719 rtx (*gen_ashr3)(rtx, rtx, rtx)
24720 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24721 rtx (*gen_shrd)(rtx, rtx, rtx);
24722 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24724 rtx low[2], high[2];
24727 if (CONST_INT_P (operands[2]))
24729 split_double_mode (mode, operands, 2, low, high);
24730 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24732 if (count == GET_MODE_BITSIZE (mode) - 1)
24734 emit_move_insn (high[0], high[1]);
24735 emit_insn (gen_ashr3 (high[0], high[0],
24736 GEN_INT (half_width - 1)));
24737 emit_move_insn (low[0], high[0]);
24740 else if (count >= half_width)
24742 emit_move_insn (low[0], high[1]);
24743 emit_move_insn (high[0], low[0]);
24744 emit_insn (gen_ashr3 (high[0], high[0],
24745 GEN_INT (half_width - 1)));
24747 if (count > half_width)
24748 emit_insn (gen_ashr3 (low[0], low[0],
24749 GEN_INT (count - half_width)));
24753 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24755 if (!rtx_equal_p (operands[0], operands[1]))
24756 emit_move_insn (operands[0], operands[1]);
24758 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24759 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24764 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24766 if (!rtx_equal_p (operands[0], operands[1]))
24767 emit_move_insn (operands[0], operands[1]);
24769 split_double_mode (mode, operands, 1, low, high);
24771 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24772 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24774 if (TARGET_CMOVE && scratch)
24776 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24777 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24779 emit_move_insn (scratch, high[0]);
24780 emit_insn (gen_ashr3 (scratch, scratch,
24781 GEN_INT (half_width - 1)));
24782 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24787 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24788 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24790 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24796 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24798 rtx (*gen_lshr3)(rtx, rtx, rtx)
24799 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24800 rtx (*gen_shrd)(rtx, rtx, rtx);
24801 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24803 rtx low[2], high[2];
24806 if (CONST_INT_P (operands[2]))
24808 split_double_mode (mode, operands, 2, low, high);
24809 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24811 if (count >= half_width)
24813 emit_move_insn (low[0], high[1]);
24814 ix86_expand_clear (high[0]);
24816 if (count > half_width)
24817 emit_insn (gen_lshr3 (low[0], low[0],
24818 GEN_INT (count - half_width)));
24822 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24824 if (!rtx_equal_p (operands[0], operands[1]))
24825 emit_move_insn (operands[0], operands[1]);
24827 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24828 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24833 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24835 if (!rtx_equal_p (operands[0], operands[1]))
24836 emit_move_insn (operands[0], operands[1]);
24838 split_double_mode (mode, operands, 1, low, high);
24840 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24841 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24843 if (TARGET_CMOVE && scratch)
24845 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24846 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24848 ix86_expand_clear (scratch);
24849 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24854 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24855 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24857 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24862 /* Predict just emitted jump instruction to be taken with probability PROB. */
24864 predict_jump (int prob)
24866 rtx insn = get_last_insn ();
24867 gcc_assert (JUMP_P (insn));
24868 add_int_reg_note (insn, REG_BR_PROB, prob);
24871 /* Helper function for the string operations below. Dest VARIABLE whether
24872 it is aligned to VALUE bytes. If true, jump to the label. */
24873 static rtx_code_label *
24874 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24876 rtx_code_label *label = gen_label_rtx ();
24877 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24878 if (GET_MODE (variable) == DImode)
24879 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24881 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24882 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24885 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24887 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24891 /* Adjust COUNTER by the VALUE. */
24893 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24895 rtx (*gen_add)(rtx, rtx, rtx)
24896 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24898 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24901 /* Zero extend possibly SImode EXP to Pmode register. */
24903 ix86_zero_extend_to_Pmode (rtx exp)
24905 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24908 /* Divide COUNTREG by SCALE. */
24910 scale_counter (rtx countreg, int scale)
24916 if (CONST_INT_P (countreg))
24917 return GEN_INT (INTVAL (countreg) / scale);
24918 gcc_assert (REG_P (countreg));
24920 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24921 GEN_INT (exact_log2 (scale)),
24922 NULL, 1, OPTAB_DIRECT);
24926 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24927 DImode for constant loop counts. */
24929 static machine_mode
24930 counter_mode (rtx count_exp)
24932 if (GET_MODE (count_exp) != VOIDmode)
24933 return GET_MODE (count_exp);
24934 if (!CONST_INT_P (count_exp))
24936 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24941 /* Copy the address to a Pmode register. This is used for x32 to
24942 truncate DImode TLS address to a SImode register. */
24945 ix86_copy_addr_to_reg (rtx addr)
24948 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
24950 reg = copy_addr_to_reg (addr);
24951 REG_POINTER (reg) = 1;
24956 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
24957 reg = copy_to_mode_reg (DImode, addr);
24958 REG_POINTER (reg) = 1;
24959 return gen_rtx_SUBREG (SImode, reg, 0);
24963 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
24964 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
24965 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
24966 memory by VALUE (supposed to be in MODE).
24968 The size is rounded down to whole number of chunk size moved at once.
24969 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
24973 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
24974 rtx destptr, rtx srcptr, rtx value,
24975 rtx count, machine_mode mode, int unroll,
24976 int expected_size, bool issetmem)
24978 rtx_code_label *out_label, *top_label;
24980 machine_mode iter_mode = counter_mode (count);
24981 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
24982 rtx piece_size = GEN_INT (piece_size_n);
24983 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
24987 top_label = gen_label_rtx ();
24988 out_label = gen_label_rtx ();
24989 iter = gen_reg_rtx (iter_mode);
24991 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
24992 NULL, 1, OPTAB_DIRECT);
24993 /* Those two should combine. */
24994 if (piece_size == const1_rtx)
24996 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
24998 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25000 emit_move_insn (iter, const0_rtx);
25002 emit_label (top_label);
25004 tmp = convert_modes (Pmode, iter_mode, iter, true);
25006 /* This assert could be relaxed - in this case we'll need to compute
25007 smallest power of two, containing in PIECE_SIZE_N and pass it to
25009 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
25010 destmem = offset_address (destmem, tmp, piece_size_n);
25011 destmem = adjust_address (destmem, mode, 0);
25015 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
25016 srcmem = adjust_address (srcmem, mode, 0);
25018 /* When unrolling for chips that reorder memory reads and writes,
25019 we can save registers by using single temporary.
25020 Also using 4 temporaries is overkill in 32bit mode. */
25021 if (!TARGET_64BIT && 0)
25023 for (i = 0; i < unroll; i++)
25028 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25030 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25032 emit_move_insn (destmem, srcmem);
25038 gcc_assert (unroll <= 4);
25039 for (i = 0; i < unroll; i++)
25041 tmpreg[i] = gen_reg_rtx (mode);
25045 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25047 emit_move_insn (tmpreg[i], srcmem);
25049 for (i = 0; i < unroll; i++)
25054 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25056 emit_move_insn (destmem, tmpreg[i]);
25061 for (i = 0; i < unroll; i++)
25065 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25066 emit_move_insn (destmem, value);
25069 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25070 true, OPTAB_LIB_WIDEN);
25072 emit_move_insn (iter, tmp);
25074 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25076 if (expected_size != -1)
25078 expected_size /= GET_MODE_SIZE (mode) * unroll;
25079 if (expected_size == 0)
25081 else if (expected_size > REG_BR_PROB_BASE)
25082 predict_jump (REG_BR_PROB_BASE - 1);
25084 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25087 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25088 iter = ix86_zero_extend_to_Pmode (iter);
25089 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25090 true, OPTAB_LIB_WIDEN);
25091 if (tmp != destptr)
25092 emit_move_insn (destptr, tmp);
25095 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25096 true, OPTAB_LIB_WIDEN);
25098 emit_move_insn (srcptr, tmp);
25100 emit_label (out_label);
25103 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25104 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25105 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25106 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25107 ORIG_VALUE is the original value passed to memset to fill the memory with.
25108 Other arguments have same meaning as for previous function. */
25111 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25112 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25114 machine_mode mode, bool issetmem)
25119 HOST_WIDE_INT rounded_count;
25121 /* If possible, it is shorter to use rep movs.
25122 TODO: Maybe it is better to move this logic to decide_alg. */
25123 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25124 && (!issetmem || orig_value == const0_rtx))
25127 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25128 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25130 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25131 GET_MODE_SIZE (mode)));
25132 if (mode != QImode)
25134 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25135 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25136 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25139 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25140 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25143 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25144 destmem = shallow_copy_rtx (destmem);
25145 set_mem_size (destmem, rounded_count);
25147 else if (MEM_SIZE_KNOWN_P (destmem))
25148 clear_mem_size (destmem);
25152 value = force_reg (mode, gen_lowpart (mode, value));
25153 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25157 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25158 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25159 if (mode != QImode)
25161 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25162 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25163 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25166 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25167 if (CONST_INT_P (count))
25170 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25171 srcmem = shallow_copy_rtx (srcmem);
25172 set_mem_size (srcmem, rounded_count);
25176 if (MEM_SIZE_KNOWN_P (srcmem))
25177 clear_mem_size (srcmem);
25179 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25184 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25186 SRC is passed by pointer to be updated on return.
25187 Return value is updated DST. */
25189 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25190 HOST_WIDE_INT size_to_move)
25192 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25193 enum insn_code code;
25194 machine_mode move_mode;
25197 /* Find the widest mode in which we could perform moves.
25198 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25199 it until move of such size is supported. */
25200 piece_size = 1 << floor_log2 (size_to_move);
25201 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25202 code = optab_handler (mov_optab, move_mode);
25203 while (code == CODE_FOR_nothing && piece_size > 1)
25206 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25207 code = optab_handler (mov_optab, move_mode);
25210 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25211 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25212 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25214 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25215 move_mode = mode_for_vector (word_mode, nunits);
25216 code = optab_handler (mov_optab, move_mode);
25217 if (code == CODE_FOR_nothing)
25219 move_mode = word_mode;
25220 piece_size = GET_MODE_SIZE (move_mode);
25221 code = optab_handler (mov_optab, move_mode);
25224 gcc_assert (code != CODE_FOR_nothing);
25226 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25227 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25229 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25230 gcc_assert (size_to_move % piece_size == 0);
25231 adjust = GEN_INT (piece_size);
25232 for (i = 0; i < size_to_move; i += piece_size)
25234 /* We move from memory to memory, so we'll need to do it via
25235 a temporary register. */
25236 tempreg = gen_reg_rtx (move_mode);
25237 emit_insn (GEN_FCN (code) (tempreg, src));
25238 emit_insn (GEN_FCN (code) (dst, tempreg));
25240 emit_move_insn (destptr,
25241 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25242 emit_move_insn (srcptr,
25243 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25245 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25247 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25251 /* Update DST and SRC rtx. */
25256 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25258 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25259 rtx destptr, rtx srcptr, rtx count, int max_size)
25262 if (CONST_INT_P (count))
25264 HOST_WIDE_INT countval = INTVAL (count);
25265 HOST_WIDE_INT epilogue_size = countval % max_size;
25268 /* For now MAX_SIZE should be a power of 2. This assert could be
25269 relaxed, but it'll require a bit more complicated epilogue
25271 gcc_assert ((max_size & (max_size - 1)) == 0);
25272 for (i = max_size; i >= 1; i >>= 1)
25274 if (epilogue_size & i)
25275 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25281 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25282 count, 1, OPTAB_DIRECT);
25283 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25284 count, QImode, 1, 4, false);
25288 /* When there are stringops, we can cheaply increase dest and src pointers.
25289 Otherwise we save code size by maintaining offset (zero is readily
25290 available from preceding rep operation) and using x86 addressing modes.
25292 if (TARGET_SINGLE_STRINGOP)
25296 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25297 src = change_address (srcmem, SImode, srcptr);
25298 dest = change_address (destmem, SImode, destptr);
25299 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25300 emit_label (label);
25301 LABEL_NUSES (label) = 1;
25305 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25306 src = change_address (srcmem, HImode, srcptr);
25307 dest = change_address (destmem, HImode, destptr);
25308 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25309 emit_label (label);
25310 LABEL_NUSES (label) = 1;
25314 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25315 src = change_address (srcmem, QImode, srcptr);
25316 dest = change_address (destmem, QImode, destptr);
25317 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25318 emit_label (label);
25319 LABEL_NUSES (label) = 1;
25324 rtx offset = force_reg (Pmode, const0_rtx);
25329 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25330 src = change_address (srcmem, SImode, srcptr);
25331 dest = change_address (destmem, SImode, destptr);
25332 emit_move_insn (dest, src);
25333 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25334 true, OPTAB_LIB_WIDEN);
25336 emit_move_insn (offset, tmp);
25337 emit_label (label);
25338 LABEL_NUSES (label) = 1;
25342 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25343 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25344 src = change_address (srcmem, HImode, tmp);
25345 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25346 dest = change_address (destmem, HImode, tmp);
25347 emit_move_insn (dest, src);
25348 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25349 true, OPTAB_LIB_WIDEN);
25351 emit_move_insn (offset, tmp);
25352 emit_label (label);
25353 LABEL_NUSES (label) = 1;
25357 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25358 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25359 src = change_address (srcmem, QImode, tmp);
25360 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25361 dest = change_address (destmem, QImode, tmp);
25362 emit_move_insn (dest, src);
25363 emit_label (label);
25364 LABEL_NUSES (label) = 1;
25369 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25370 with value PROMOTED_VAL.
25371 SRC is passed by pointer to be updated on return.
25372 Return value is updated DST. */
25374 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25375 HOST_WIDE_INT size_to_move)
25377 rtx dst = destmem, adjust;
25378 enum insn_code code;
25379 machine_mode move_mode;
25382 /* Find the widest mode in which we could perform moves.
25383 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25384 it until move of such size is supported. */
25385 move_mode = GET_MODE (promoted_val);
25386 if (move_mode == VOIDmode)
25387 move_mode = QImode;
25388 if (size_to_move < GET_MODE_SIZE (move_mode))
25390 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25391 promoted_val = gen_lowpart (move_mode, promoted_val);
25393 piece_size = GET_MODE_SIZE (move_mode);
25394 code = optab_handler (mov_optab, move_mode);
25395 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25397 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25399 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25400 gcc_assert (size_to_move % piece_size == 0);
25401 adjust = GEN_INT (piece_size);
25402 for (i = 0; i < size_to_move; i += piece_size)
25404 if (piece_size <= GET_MODE_SIZE (word_mode))
25406 emit_insn (gen_strset (destptr, dst, promoted_val));
25407 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25412 emit_insn (GEN_FCN (code) (dst, promoted_val));
25414 emit_move_insn (destptr,
25415 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25417 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25421 /* Update DST rtx. */
25424 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25426 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25427 rtx count, int max_size)
25430 expand_simple_binop (counter_mode (count), AND, count,
25431 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25432 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25433 gen_lowpart (QImode, value), count, QImode,
25434 1, max_size / 2, true);
25437 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25439 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25440 rtx count, int max_size)
25444 if (CONST_INT_P (count))
25446 HOST_WIDE_INT countval = INTVAL (count);
25447 HOST_WIDE_INT epilogue_size = countval % max_size;
25450 /* For now MAX_SIZE should be a power of 2. This assert could be
25451 relaxed, but it'll require a bit more complicated epilogue
25453 gcc_assert ((max_size & (max_size - 1)) == 0);
25454 for (i = max_size; i >= 1; i >>= 1)
25456 if (epilogue_size & i)
25458 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25459 destmem = emit_memset (destmem, destptr, vec_value, i);
25461 destmem = emit_memset (destmem, destptr, value, i);
25468 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25473 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25476 dest = change_address (destmem, DImode, destptr);
25477 emit_insn (gen_strset (destptr, dest, value));
25478 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25479 emit_insn (gen_strset (destptr, dest, value));
25483 dest = change_address (destmem, SImode, destptr);
25484 emit_insn (gen_strset (destptr, dest, value));
25485 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25486 emit_insn (gen_strset (destptr, dest, value));
25487 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25488 emit_insn (gen_strset (destptr, dest, value));
25489 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25490 emit_insn (gen_strset (destptr, dest, value));
25492 emit_label (label);
25493 LABEL_NUSES (label) = 1;
25497 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25500 dest = change_address (destmem, DImode, destptr);
25501 emit_insn (gen_strset (destptr, dest, value));
25505 dest = change_address (destmem, SImode, destptr);
25506 emit_insn (gen_strset (destptr, dest, value));
25507 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25508 emit_insn (gen_strset (destptr, dest, value));
25510 emit_label (label);
25511 LABEL_NUSES (label) = 1;
25515 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25516 dest = change_address (destmem, SImode, destptr);
25517 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25518 emit_label (label);
25519 LABEL_NUSES (label) = 1;
25523 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25524 dest = change_address (destmem, HImode, destptr);
25525 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25526 emit_label (label);
25527 LABEL_NUSES (label) = 1;
25531 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25532 dest = change_address (destmem, QImode, destptr);
25533 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25534 emit_label (label);
25535 LABEL_NUSES (label) = 1;
25539 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25540 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25541 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25543 Return value is updated DESTMEM. */
25545 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25546 rtx destptr, rtx srcptr, rtx value,
25547 rtx vec_value, rtx count, int align,
25548 int desired_alignment, bool issetmem)
25551 for (i = 1; i < desired_alignment; i <<= 1)
25555 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25558 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25559 destmem = emit_memset (destmem, destptr, vec_value, i);
25561 destmem = emit_memset (destmem, destptr, value, i);
25564 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25565 ix86_adjust_counter (count, i);
25566 emit_label (label);
25567 LABEL_NUSES (label) = 1;
25568 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25574 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25575 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25576 and jump to DONE_LABEL. */
25578 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25579 rtx destptr, rtx srcptr,
25580 rtx value, rtx vec_value,
25581 rtx count, int size,
25582 rtx done_label, bool issetmem)
25584 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25585 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25589 /* If we do not have vector value to copy, we must reduce size. */
25594 if (GET_MODE (value) == VOIDmode && size > 8)
25596 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25597 mode = GET_MODE (value);
25600 mode = GET_MODE (vec_value), value = vec_value;
25604 /* Choose appropriate vector mode. */
25606 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25607 else if (size >= 16)
25608 mode = TARGET_SSE ? V16QImode : DImode;
25609 srcmem = change_address (srcmem, mode, srcptr);
25611 destmem = change_address (destmem, mode, destptr);
25612 modesize = GEN_INT (GET_MODE_SIZE (mode));
25613 gcc_assert (GET_MODE_SIZE (mode) <= size);
25614 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25617 emit_move_insn (destmem, gen_lowpart (mode, value));
25620 emit_move_insn (destmem, srcmem);
25621 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25623 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25626 destmem = offset_address (destmem, count, 1);
25627 destmem = offset_address (destmem, GEN_INT (-2 * size),
25628 GET_MODE_SIZE (mode));
25631 srcmem = offset_address (srcmem, count, 1);
25632 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25633 GET_MODE_SIZE (mode));
25635 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25638 emit_move_insn (destmem, gen_lowpart (mode, value));
25641 emit_move_insn (destmem, srcmem);
25642 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25644 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25646 emit_jump_insn (gen_jump (done_label));
25649 emit_label (label);
25650 LABEL_NUSES (label) = 1;
25653 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25654 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25655 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25656 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25657 DONE_LABEL is a label after the whole copying sequence. The label is created
25658 on demand if *DONE_LABEL is NULL.
25659 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25660 bounds after the initial copies.
25662 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25663 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25664 we will dispatch to a library call for large blocks.
25666 In pseudocode we do:
25670 Assume that SIZE is 4. Bigger sizes are handled analogously
25673 copy 4 bytes from SRCPTR to DESTPTR
25674 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25679 copy 1 byte from SRCPTR to DESTPTR
25682 copy 2 bytes from SRCPTR to DESTPTR
25683 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25688 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25689 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25691 OLD_DESPTR = DESTPTR;
25692 Align DESTPTR up to DESIRED_ALIGN
25693 SRCPTR += DESTPTR - OLD_DESTPTR
25694 COUNT -= DEST_PTR - OLD_DESTPTR
25696 Round COUNT down to multiple of SIZE
25697 << optional caller supplied zero size guard is here >>
25698 << optional caller supplied dynamic check is here >>
25699 << caller supplied main copy loop is here >>
25704 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25705 rtx *destptr, rtx *srcptr,
25707 rtx value, rtx vec_value,
25709 rtx_code_label **done_label,
25713 unsigned HOST_WIDE_INT *min_size,
25714 bool dynamic_check,
25717 rtx_code_label *loop_label = NULL, *label;
25720 int prolog_size = 0;
25723 /* Chose proper value to copy. */
25724 if (issetmem && VECTOR_MODE_P (mode))
25725 mode_value = vec_value;
25727 mode_value = value;
25728 gcc_assert (GET_MODE_SIZE (mode) <= size);
25730 /* See if block is big or small, handle small blocks. */
25731 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25734 loop_label = gen_label_rtx ();
25737 *done_label = gen_label_rtx ();
25739 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25743 /* Handle sizes > 3. */
25744 for (;size2 > 2; size2 >>= 1)
25745 expand_small_movmem_or_setmem (destmem, srcmem,
25749 size2, *done_label, issetmem);
25750 /* Nothing to copy? Jump to DONE_LABEL if so */
25751 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25754 /* Do a byte copy. */
25755 destmem = change_address (destmem, QImode, *destptr);
25757 emit_move_insn (destmem, gen_lowpart (QImode, value));
25760 srcmem = change_address (srcmem, QImode, *srcptr);
25761 emit_move_insn (destmem, srcmem);
25764 /* Handle sizes 2 and 3. */
25765 label = ix86_expand_aligntest (*count, 2, false);
25766 destmem = change_address (destmem, HImode, *destptr);
25767 destmem = offset_address (destmem, *count, 1);
25768 destmem = offset_address (destmem, GEN_INT (-2), 2);
25770 emit_move_insn (destmem, gen_lowpart (HImode, value));
25773 srcmem = change_address (srcmem, HImode, *srcptr);
25774 srcmem = offset_address (srcmem, *count, 1);
25775 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25776 emit_move_insn (destmem, srcmem);
25779 emit_label (label);
25780 LABEL_NUSES (label) = 1;
25781 emit_jump_insn (gen_jump (*done_label));
25785 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25786 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25788 /* Start memcpy for COUNT >= SIZE. */
25791 emit_label (loop_label);
25792 LABEL_NUSES (loop_label) = 1;
25795 /* Copy first desired_align bytes. */
25797 srcmem = change_address (srcmem, mode, *srcptr);
25798 destmem = change_address (destmem, mode, *destptr);
25799 modesize = GEN_INT (GET_MODE_SIZE (mode));
25800 for (n = 0; prolog_size < desired_align - align; n++)
25803 emit_move_insn (destmem, mode_value);
25806 emit_move_insn (destmem, srcmem);
25807 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25809 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25810 prolog_size += GET_MODE_SIZE (mode);
25814 /* Copy last SIZE bytes. */
25815 destmem = offset_address (destmem, *count, 1);
25816 destmem = offset_address (destmem,
25817 GEN_INT (-size - prolog_size),
25820 emit_move_insn (destmem, mode_value);
25823 srcmem = offset_address (srcmem, *count, 1);
25824 srcmem = offset_address (srcmem,
25825 GEN_INT (-size - prolog_size),
25827 emit_move_insn (destmem, srcmem);
25829 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25831 destmem = offset_address (destmem, modesize, 1);
25833 emit_move_insn (destmem, mode_value);
25836 srcmem = offset_address (srcmem, modesize, 1);
25837 emit_move_insn (destmem, srcmem);
25841 /* Align destination. */
25842 if (desired_align > 1 && desired_align > align)
25844 rtx saveddest = *destptr;
25846 gcc_assert (desired_align <= size);
25847 /* Align destptr up, place it to new register. */
25848 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25849 GEN_INT (prolog_size),
25850 NULL_RTX, 1, OPTAB_DIRECT);
25851 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25852 REG_POINTER (*destptr) = 1;
25853 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25854 GEN_INT (-desired_align),
25855 *destptr, 1, OPTAB_DIRECT);
25856 /* See how many bytes we skipped. */
25857 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25859 saveddest, 1, OPTAB_DIRECT);
25860 /* Adjust srcptr and count. */
25862 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25863 saveddest, *srcptr, 1, OPTAB_DIRECT);
25864 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25865 saveddest, *count, 1, OPTAB_DIRECT);
25866 /* We copied at most size + prolog_size. */
25867 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25869 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25873 /* Our loops always round down the block size, but for dispatch to
25874 library we need precise value. */
25876 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25877 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25881 gcc_assert (prolog_size == 0);
25882 /* Decrease count, so we won't end up copying last word twice. */
25883 if (!CONST_INT_P (*count))
25884 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25885 constm1_rtx, *count, 1, OPTAB_DIRECT);
25887 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25888 (unsigned HOST_WIDE_INT)size));
25890 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25895 /* This function is like the previous one, except here we know how many bytes
25896 need to be copied. That allows us to update alignment not only of DST, which
25897 is returned, but also of SRC, which is passed as a pointer for that
25900 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25901 rtx srcreg, rtx value, rtx vec_value,
25902 int desired_align, int align_bytes,
25906 rtx orig_dst = dst;
25907 rtx orig_src = NULL;
25908 int piece_size = 1;
25909 int copied_bytes = 0;
25913 gcc_assert (srcp != NULL);
25918 for (piece_size = 1;
25919 piece_size <= desired_align && copied_bytes < align_bytes;
25922 if (align_bytes & piece_size)
25926 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25927 dst = emit_memset (dst, destreg, vec_value, piece_size);
25929 dst = emit_memset (dst, destreg, value, piece_size);
25932 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25933 copied_bytes += piece_size;
25936 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25937 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25938 if (MEM_SIZE_KNOWN_P (orig_dst))
25939 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25943 int src_align_bytes = get_mem_align_offset (src, desired_align
25945 if (src_align_bytes >= 0)
25946 src_align_bytes = desired_align - src_align_bytes;
25947 if (src_align_bytes >= 0)
25949 unsigned int src_align;
25950 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
25952 if ((src_align_bytes & (src_align - 1))
25953 == (align_bytes & (src_align - 1)))
25956 if (src_align > (unsigned int) desired_align)
25957 src_align = desired_align;
25958 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
25959 set_mem_align (src, src_align * BITS_PER_UNIT);
25961 if (MEM_SIZE_KNOWN_P (orig_src))
25962 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
25969 /* Return true if ALG can be used in current context.
25970 Assume we expand memset if MEMSET is true. */
25972 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
25974 if (alg == no_stringop)
25976 if (alg == vector_loop)
25977 return TARGET_SSE || TARGET_AVX;
25978 /* Algorithms using the rep prefix want at least edi and ecx;
25979 additionally, memset wants eax and memcpy wants esi. Don't
25980 consider such algorithms if the user has appropriated those
25981 registers for their own purposes, or if we have a non-default
25982 address space, since some string insns cannot override the segment. */
25983 if (alg == rep_prefix_1_byte
25984 || alg == rep_prefix_4_byte
25985 || alg == rep_prefix_8_byte)
25989 if (fixed_regs[CX_REG]
25990 || fixed_regs[DI_REG]
25991 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
25997 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
25998 static enum stringop_alg
25999 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
26000 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
26001 bool memset, bool zero_memset, bool have_as,
26002 int *dynamic_check, bool *noalign)
26004 const struct stringop_algs * algs;
26005 bool optimize_for_speed;
26007 const struct processor_costs *cost;
26009 bool any_alg_usable_p = false;
26012 *dynamic_check = -1;
26014 /* Even if the string operation call is cold, we still might spend a lot
26015 of time processing large blocks. */
26016 if (optimize_function_for_size_p (cfun)
26017 || (optimize_insn_for_size_p ()
26019 || (expected_size != -1 && expected_size < 256))))
26020 optimize_for_speed = false;
26022 optimize_for_speed = true;
26024 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26026 algs = &cost->memset[TARGET_64BIT != 0];
26028 algs = &cost->memcpy[TARGET_64BIT != 0];
26030 /* See maximal size for user defined algorithm. */
26031 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26033 enum stringop_alg candidate = algs->size[i].alg;
26034 bool usable = alg_usable_p (candidate, memset, have_as);
26035 any_alg_usable_p |= usable;
26037 if (candidate != libcall && candidate && usable)
26038 max = algs->size[i].max;
26041 /* If expected size is not known but max size is small enough
26042 so inline version is a win, set expected size into
26044 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26045 && expected_size == -1)
26046 expected_size = min_size / 2 + max_size / 2;
26048 /* If user specified the algorithm, honnor it if possible. */
26049 if (ix86_stringop_alg != no_stringop
26050 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26051 return ix86_stringop_alg;
26052 /* rep; movq or rep; movl is the smallest variant. */
26053 else if (!optimize_for_speed)
26056 if (!count || (count & 3) || (memset && !zero_memset))
26057 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26058 ? rep_prefix_1_byte : loop_1_byte;
26060 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26061 ? rep_prefix_4_byte : loop;
26063 /* Very tiny blocks are best handled via the loop, REP is expensive to
26065 else if (expected_size != -1 && expected_size < 4)
26066 return loop_1_byte;
26067 else if (expected_size != -1)
26069 enum stringop_alg alg = libcall;
26070 bool alg_noalign = false;
26071 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26073 /* We get here if the algorithms that were not libcall-based
26074 were rep-prefix based and we are unable to use rep prefixes
26075 based on global register usage. Break out of the loop and
26076 use the heuristic below. */
26077 if (algs->size[i].max == 0)
26079 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26081 enum stringop_alg candidate = algs->size[i].alg;
26083 if (candidate != libcall
26084 && alg_usable_p (candidate, memset, have_as))
26087 alg_noalign = algs->size[i].noalign;
26089 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26090 last non-libcall inline algorithm. */
26091 if (TARGET_INLINE_ALL_STRINGOPS)
26093 /* When the current size is best to be copied by a libcall,
26094 but we are still forced to inline, run the heuristic below
26095 that will pick code for medium sized blocks. */
26096 if (alg != libcall)
26098 *noalign = alg_noalign;
26101 else if (!any_alg_usable_p)
26104 else if (alg_usable_p (candidate, memset, have_as))
26106 *noalign = algs->size[i].noalign;
26112 /* When asked to inline the call anyway, try to pick meaningful choice.
26113 We look for maximal size of block that is faster to copy by hand and
26114 take blocks of at most of that size guessing that average size will
26115 be roughly half of the block.
26117 If this turns out to be bad, we might simply specify the preferred
26118 choice in ix86_costs. */
26119 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26120 && (algs->unknown_size == libcall
26121 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26123 enum stringop_alg alg;
26125 /* If there aren't any usable algorithms, then recursing on
26126 smaller sizes isn't going to find anything. Just return the
26127 simple byte-at-a-time copy loop. */
26128 if (!any_alg_usable_p)
26130 /* Pick something reasonable. */
26131 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26132 *dynamic_check = 128;
26133 return loop_1_byte;
26137 alg = decide_alg (count, max / 2, min_size, max_size, memset,
26138 zero_memset, have_as, dynamic_check, noalign);
26139 gcc_assert (*dynamic_check == -1);
26140 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26141 *dynamic_check = max;
26143 gcc_assert (alg != libcall);
26146 return (alg_usable_p (algs->unknown_size, memset, have_as)
26147 ? algs->unknown_size : libcall);
26150 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26151 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26153 decide_alignment (int align,
26154 enum stringop_alg alg,
26156 machine_mode move_mode)
26158 int desired_align = 0;
26160 gcc_assert (alg != no_stringop);
26162 if (alg == libcall)
26164 if (move_mode == VOIDmode)
26167 desired_align = GET_MODE_SIZE (move_mode);
26168 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26169 copying whole cacheline at once. */
26170 if (TARGET_PENTIUMPRO
26171 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26176 if (desired_align < align)
26177 desired_align = align;
26178 if (expected_size != -1 && expected_size < 4)
26179 desired_align = align;
26181 return desired_align;
26185 /* Helper function for memcpy. For QImode value 0xXY produce
26186 0xXYXYXYXY of wide specified by MODE. This is essentially
26187 a * 0x10101010, but we can do slightly better than
26188 synth_mult by unwinding the sequence by hand on CPUs with
26191 promote_duplicated_reg (machine_mode mode, rtx val)
26193 machine_mode valmode = GET_MODE (val);
26195 int nops = mode == DImode ? 3 : 2;
26197 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26198 if (val == const0_rtx)
26199 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26200 if (CONST_INT_P (val))
26202 HOST_WIDE_INT v = INTVAL (val) & 255;
26206 if (mode == DImode)
26207 v |= (v << 16) << 16;
26208 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26211 if (valmode == VOIDmode)
26213 if (valmode != QImode)
26214 val = gen_lowpart (QImode, val);
26215 if (mode == QImode)
26217 if (!TARGET_PARTIAL_REG_STALL)
26219 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26220 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26221 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26222 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26224 rtx reg = convert_modes (mode, QImode, val, true);
26225 tmp = promote_duplicated_reg (mode, const1_rtx);
26226 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26231 rtx reg = convert_modes (mode, QImode, val, true);
26233 if (!TARGET_PARTIAL_REG_STALL)
26234 if (mode == SImode)
26235 emit_insn (gen_insvsi_1 (reg, reg));
26237 emit_insn (gen_insvdi_1 (reg, reg));
26240 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26241 NULL, 1, OPTAB_DIRECT);
26243 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26245 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26246 NULL, 1, OPTAB_DIRECT);
26247 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26248 if (mode == SImode)
26250 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26251 NULL, 1, OPTAB_DIRECT);
26252 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26257 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26258 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26259 alignment from ALIGN to DESIRED_ALIGN. */
26261 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26267 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26268 promoted_val = promote_duplicated_reg (DImode, val);
26269 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26270 promoted_val = promote_duplicated_reg (SImode, val);
26271 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26272 promoted_val = promote_duplicated_reg (HImode, val);
26274 promoted_val = val;
26276 return promoted_val;
26279 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26280 operations when profitable. The code depends upon architecture, block size
26281 and alignment, but always has one of the following overall structures:
26283 Aligned move sequence:
26285 1) Prologue guard: Conditional that jumps up to epilogues for small
26286 blocks that can be handled by epilogue alone. This is faster
26287 but also needed for correctness, since prologue assume the block
26288 is larger than the desired alignment.
26290 Optional dynamic check for size and libcall for large
26291 blocks is emitted here too, with -minline-stringops-dynamically.
26293 2) Prologue: copy first few bytes in order to get destination
26294 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26295 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26296 copied. We emit either a jump tree on power of two sized
26297 blocks, or a byte loop.
26299 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26300 with specified algorithm.
26302 4) Epilogue: code copying tail of the block that is too small to be
26303 handled by main body (or up to size guarded by prologue guard).
26305 Misaligned move sequence
26307 1) missaligned move prologue/epilogue containing:
26308 a) Prologue handling small memory blocks and jumping to done_label
26309 (skipped if blocks are known to be large enough)
26310 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26311 needed by single possibly misaligned move
26312 (skipped if alignment is not needed)
26313 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26315 2) Zero size guard dispatching to done_label, if needed
26317 3) dispatch to library call, if needed,
26319 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26320 with specified algorithm. */
26322 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26323 rtx align_exp, rtx expected_align_exp,
26324 rtx expected_size_exp, rtx min_size_exp,
26325 rtx max_size_exp, rtx probable_max_size_exp,
26330 rtx_code_label *label = NULL;
26332 rtx_code_label *jump_around_label = NULL;
26333 HOST_WIDE_INT align = 1;
26334 unsigned HOST_WIDE_INT count = 0;
26335 HOST_WIDE_INT expected_size = -1;
26336 int size_needed = 0, epilogue_size_needed;
26337 int desired_align = 0, align_bytes = 0;
26338 enum stringop_alg alg;
26339 rtx promoted_val = NULL;
26340 rtx vec_promoted_val = NULL;
26341 bool force_loopy_epilogue = false;
26343 bool need_zero_guard = false;
26345 machine_mode move_mode = VOIDmode;
26346 int unroll_factor = 1;
26347 /* TODO: Once value ranges are available, fill in proper data. */
26348 unsigned HOST_WIDE_INT min_size = 0;
26349 unsigned HOST_WIDE_INT max_size = -1;
26350 unsigned HOST_WIDE_INT probable_max_size = -1;
26351 bool misaligned_prologue_used = false;
26354 if (CONST_INT_P (align_exp))
26355 align = INTVAL (align_exp);
26356 /* i386 can do misaligned access on reasonably increased cost. */
26357 if (CONST_INT_P (expected_align_exp)
26358 && INTVAL (expected_align_exp) > align)
26359 align = INTVAL (expected_align_exp);
26360 /* ALIGN is the minimum of destination and source alignment, but we care here
26361 just about destination alignment. */
26363 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26364 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26366 if (CONST_INT_P (count_exp))
26368 min_size = max_size = probable_max_size = count = expected_size
26369 = INTVAL (count_exp);
26370 /* When COUNT is 0, there is nothing to do. */
26377 min_size = INTVAL (min_size_exp);
26379 max_size = INTVAL (max_size_exp);
26380 if (probable_max_size_exp)
26381 probable_max_size = INTVAL (probable_max_size_exp);
26382 if (CONST_INT_P (expected_size_exp))
26383 expected_size = INTVAL (expected_size_exp);
26386 /* Make sure we don't need to care about overflow later on. */
26387 if (count > (HOST_WIDE_INT_1U << 30))
26390 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26392 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26394 /* Step 0: Decide on preferred algorithm, desired alignment and
26395 size of chunks to be copied by main loop. */
26396 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26398 issetmem && val_exp == const0_rtx, have_as,
26399 &dynamic_check, &noalign);
26400 if (alg == libcall)
26402 gcc_assert (alg != no_stringop);
26404 /* For now vector-version of memset is generated only for memory zeroing, as
26405 creating of promoted vector value is very cheap in this case. */
26406 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26407 alg = unrolled_loop;
26410 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26411 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26413 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26416 move_mode = word_mode;
26422 gcc_unreachable ();
26424 need_zero_guard = true;
26425 move_mode = QImode;
26428 need_zero_guard = true;
26430 case unrolled_loop:
26431 need_zero_guard = true;
26432 unroll_factor = (TARGET_64BIT ? 4 : 2);
26435 need_zero_guard = true;
26437 /* Find the widest supported mode. */
26438 move_mode = word_mode;
26439 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26440 != CODE_FOR_nothing)
26441 move_mode = GET_MODE_WIDER_MODE (move_mode);
26443 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26444 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26445 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26447 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26448 move_mode = mode_for_vector (word_mode, nunits);
26449 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26450 move_mode = word_mode;
26452 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26454 case rep_prefix_8_byte:
26455 move_mode = DImode;
26457 case rep_prefix_4_byte:
26458 move_mode = SImode;
26460 case rep_prefix_1_byte:
26461 move_mode = QImode;
26464 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26465 epilogue_size_needed = size_needed;
26467 /* If we are going to call any library calls conditionally, make sure any
26468 pending stack adjustment happen before the first conditional branch,
26469 otherwise they will be emitted before the library call only and won't
26470 happen from the other branches. */
26471 if (dynamic_check != -1)
26472 do_pending_stack_adjust ();
26474 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26475 if (!TARGET_ALIGN_STRINGOPS || noalign)
26476 align = desired_align;
26478 /* Step 1: Prologue guard. */
26480 /* Alignment code needs count to be in register. */
26481 if (CONST_INT_P (count_exp) && desired_align > align)
26483 if (INTVAL (count_exp) > desired_align
26484 && INTVAL (count_exp) > size_needed)
26487 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26488 if (align_bytes <= 0)
26491 align_bytes = desired_align - align_bytes;
26493 if (align_bytes == 0)
26494 count_exp = force_reg (counter_mode (count_exp), count_exp);
26496 gcc_assert (desired_align >= 1 && align >= 1);
26498 /* Misaligned move sequences handle both prologue and epilogue at once.
26499 Default code generation results in a smaller code for large alignments
26500 and also avoids redundant job when sizes are known precisely. */
26501 misaligned_prologue_used
26502 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26503 && MAX (desired_align, epilogue_size_needed) <= 32
26504 && desired_align <= epilogue_size_needed
26505 && ((desired_align > align && !align_bytes)
26506 || (!count && epilogue_size_needed > 1)));
26508 /* Do the cheap promotion to allow better CSE across the
26509 main loop and epilogue (ie one load of the big constant in the
26511 For now the misaligned move sequences do not have fast path
26512 without broadcasting. */
26513 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26515 if (alg == vector_loop)
26517 gcc_assert (val_exp == const0_rtx);
26518 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26519 promoted_val = promote_duplicated_reg_to_size (val_exp,
26520 GET_MODE_SIZE (word_mode),
26521 desired_align, align);
26525 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26526 desired_align, align);
26529 /* Misaligned move sequences handles both prologues and epilogues at once.
26530 Default code generation results in smaller code for large alignments and
26531 also avoids redundant job when sizes are known precisely. */
26532 if (misaligned_prologue_used)
26534 /* Misaligned move prologue handled small blocks by itself. */
26535 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26536 (dst, src, &destreg, &srcreg,
26537 move_mode, promoted_val, vec_promoted_val,
26539 &jump_around_label,
26540 desired_align < align
26541 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26542 desired_align, align, &min_size, dynamic_check, issetmem);
26544 src = change_address (src, BLKmode, srcreg);
26545 dst = change_address (dst, BLKmode, destreg);
26546 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26547 epilogue_size_needed = 0;
26548 if (need_zero_guard
26549 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26551 /* It is possible that we copied enough so the main loop will not
26553 gcc_assert (size_needed > 1);
26554 if (jump_around_label == NULL_RTX)
26555 jump_around_label = gen_label_rtx ();
26556 emit_cmp_and_jump_insns (count_exp,
26557 GEN_INT (size_needed),
26558 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26559 if (expected_size == -1
26560 || expected_size < (desired_align - align) / 2 + size_needed)
26561 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26563 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26566 /* Ensure that alignment prologue won't copy past end of block. */
26567 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26569 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26570 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26571 Make sure it is power of 2. */
26572 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26574 /* To improve performance of small blocks, we jump around the VAL
26575 promoting mode. This mean that if the promoted VAL is not constant,
26576 we might not use it in the epilogue and have to use byte
26578 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26579 force_loopy_epilogue = true;
26580 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26581 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26583 /* If main algorithm works on QImode, no epilogue is needed.
26584 For small sizes just don't align anything. */
26585 if (size_needed == 1)
26586 desired_align = align;
26591 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26593 label = gen_label_rtx ();
26594 emit_cmp_and_jump_insns (count_exp,
26595 GEN_INT (epilogue_size_needed),
26596 LTU, 0, counter_mode (count_exp), 1, label);
26597 if (expected_size == -1 || expected_size < epilogue_size_needed)
26598 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26600 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26604 /* Emit code to decide on runtime whether library call or inline should be
26606 if (dynamic_check != -1)
26608 if (!issetmem && CONST_INT_P (count_exp))
26610 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26612 emit_block_move_via_libcall (dst, src, count_exp, false);
26613 count_exp = const0_rtx;
26619 rtx_code_label *hot_label = gen_label_rtx ();
26620 if (jump_around_label == NULL_RTX)
26621 jump_around_label = gen_label_rtx ();
26622 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26623 LEU, 0, counter_mode (count_exp),
26625 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26627 set_storage_via_libcall (dst, count_exp, val_exp, false);
26629 emit_block_move_via_libcall (dst, src, count_exp, false);
26630 emit_jump (jump_around_label);
26631 emit_label (hot_label);
26635 /* Step 2: Alignment prologue. */
26636 /* Do the expensive promotion once we branched off the small blocks. */
26637 if (issetmem && !promoted_val)
26638 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26639 desired_align, align);
26641 if (desired_align > align && !misaligned_prologue_used)
26643 if (align_bytes == 0)
26645 /* Except for the first move in prologue, we no longer know
26646 constant offset in aliasing info. It don't seems to worth
26647 the pain to maintain it for the first move, so throw away
26649 dst = change_address (dst, BLKmode, destreg);
26651 src = change_address (src, BLKmode, srcreg);
26652 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26653 promoted_val, vec_promoted_val,
26654 count_exp, align, desired_align,
26656 /* At most desired_align - align bytes are copied. */
26657 if (min_size < (unsigned)(desired_align - align))
26660 min_size -= desired_align - align;
26664 /* If we know how many bytes need to be stored before dst is
26665 sufficiently aligned, maintain aliasing info accurately. */
26666 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26674 count_exp = plus_constant (counter_mode (count_exp),
26675 count_exp, -align_bytes);
26676 count -= align_bytes;
26677 min_size -= align_bytes;
26678 max_size -= align_bytes;
26680 if (need_zero_guard
26681 && min_size < (unsigned HOST_WIDE_INT) size_needed
26682 && (count < (unsigned HOST_WIDE_INT) size_needed
26683 || (align_bytes == 0
26684 && count < ((unsigned HOST_WIDE_INT) size_needed
26685 + desired_align - align))))
26687 /* It is possible that we copied enough so the main loop will not
26689 gcc_assert (size_needed > 1);
26690 if (label == NULL_RTX)
26691 label = gen_label_rtx ();
26692 emit_cmp_and_jump_insns (count_exp,
26693 GEN_INT (size_needed),
26694 LTU, 0, counter_mode (count_exp), 1, label);
26695 if (expected_size == -1
26696 || expected_size < (desired_align - align) / 2 + size_needed)
26697 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26699 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26702 if (label && size_needed == 1)
26704 emit_label (label);
26705 LABEL_NUSES (label) = 1;
26707 epilogue_size_needed = 1;
26709 promoted_val = val_exp;
26711 else if (label == NULL_RTX && !misaligned_prologue_used)
26712 epilogue_size_needed = size_needed;
26714 /* Step 3: Main loop. */
26721 gcc_unreachable ();
26724 case unrolled_loop:
26725 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26726 count_exp, move_mode, unroll_factor,
26727 expected_size, issetmem);
26730 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26731 vec_promoted_val, count_exp, move_mode,
26732 unroll_factor, expected_size, issetmem);
26734 case rep_prefix_8_byte:
26735 case rep_prefix_4_byte:
26736 case rep_prefix_1_byte:
26737 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26738 val_exp, count_exp, move_mode, issetmem);
26741 /* Adjust properly the offset of src and dest memory for aliasing. */
26742 if (CONST_INT_P (count_exp))
26745 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26746 (count / size_needed) * size_needed);
26747 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26748 (count / size_needed) * size_needed);
26753 src = change_address (src, BLKmode, srcreg);
26754 dst = change_address (dst, BLKmode, destreg);
26757 /* Step 4: Epilogue to copy the remaining bytes. */
26761 /* When the main loop is done, COUNT_EXP might hold original count,
26762 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26763 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26764 bytes. Compensate if needed. */
26766 if (size_needed < epilogue_size_needed)
26769 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26770 GEN_INT (size_needed - 1), count_exp, 1,
26772 if (tmp != count_exp)
26773 emit_move_insn (count_exp, tmp);
26775 emit_label (label);
26776 LABEL_NUSES (label) = 1;
26779 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26781 if (force_loopy_epilogue)
26782 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26783 epilogue_size_needed);
26787 expand_setmem_epilogue (dst, destreg, promoted_val,
26788 vec_promoted_val, count_exp,
26789 epilogue_size_needed);
26791 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26792 epilogue_size_needed);
26795 if (jump_around_label)
26796 emit_label (jump_around_label);
26801 /* Expand the appropriate insns for doing strlen if not just doing
26804 out = result, initialized with the start address
26805 align_rtx = alignment of the address.
26806 scratch = scratch register, initialized with the startaddress when
26807 not aligned, otherwise undefined
26809 This is just the body. It needs the initializations mentioned above and
26810 some address computing at the end. These things are done in i386.md. */
26813 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26817 rtx_code_label *align_2_label = NULL;
26818 rtx_code_label *align_3_label = NULL;
26819 rtx_code_label *align_4_label = gen_label_rtx ();
26820 rtx_code_label *end_0_label = gen_label_rtx ();
26822 rtx tmpreg = gen_reg_rtx (SImode);
26823 rtx scratch = gen_reg_rtx (SImode);
26827 if (CONST_INT_P (align_rtx))
26828 align = INTVAL (align_rtx);
26830 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26832 /* Is there a known alignment and is it less than 4? */
26835 rtx scratch1 = gen_reg_rtx (Pmode);
26836 emit_move_insn (scratch1, out);
26837 /* Is there a known alignment and is it not 2? */
26840 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26841 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26843 /* Leave just the 3 lower bits. */
26844 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26845 NULL_RTX, 0, OPTAB_WIDEN);
26847 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26848 Pmode, 1, align_4_label);
26849 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26850 Pmode, 1, align_2_label);
26851 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26852 Pmode, 1, align_3_label);
26856 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26857 check if is aligned to 4 - byte. */
26859 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26860 NULL_RTX, 0, OPTAB_WIDEN);
26862 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26863 Pmode, 1, align_4_label);
26866 mem = change_address (src, QImode, out);
26868 /* Now compare the bytes. */
26870 /* Compare the first n unaligned byte on a byte per byte basis. */
26871 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26872 QImode, 1, end_0_label);
26874 /* Increment the address. */
26875 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26877 /* Not needed with an alignment of 2 */
26880 emit_label (align_2_label);
26882 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26885 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26887 emit_label (align_3_label);
26890 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26893 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26896 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26897 align this loop. It gives only huge programs, but does not help to
26899 emit_label (align_4_label);
26901 mem = change_address (src, SImode, out);
26902 emit_move_insn (scratch, mem);
26903 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26905 /* This formula yields a nonzero result iff one of the bytes is zero.
26906 This saves three branches inside loop and many cycles. */
26908 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26909 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26910 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26911 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26912 gen_int_mode (0x80808080, SImode)));
26913 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26918 rtx reg = gen_reg_rtx (SImode);
26919 rtx reg2 = gen_reg_rtx (Pmode);
26920 emit_move_insn (reg, tmpreg);
26921 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26923 /* If zero is not in the first two bytes, move two bytes forward. */
26924 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26925 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26926 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26927 emit_insn (gen_rtx_SET (tmpreg,
26928 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26931 /* Emit lea manually to avoid clobbering of flags. */
26932 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26934 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26935 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26936 emit_insn (gen_rtx_SET (out,
26937 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26943 rtx_code_label *end_2_label = gen_label_rtx ();
26944 /* Is zero in the first two bytes? */
26946 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26947 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26948 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
26949 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26950 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
26952 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
26953 JUMP_LABEL (tmp) = end_2_label;
26955 /* Not in the first two. Move two bytes forward. */
26956 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
26957 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
26959 emit_label (end_2_label);
26963 /* Avoid branch in fixing the byte. */
26964 tmpreg = gen_lowpart (QImode, tmpreg);
26965 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
26966 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
26967 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
26968 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
26970 emit_label (end_0_label);
26973 /* Expand strlen. */
26976 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
26978 rtx addr, scratch1, scratch2, scratch3, scratch4;
26980 /* The generic case of strlen expander is long. Avoid it's
26981 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
26983 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26984 && !TARGET_INLINE_ALL_STRINGOPS
26985 && !optimize_insn_for_size_p ()
26986 && (!CONST_INT_P (align) || INTVAL (align) < 4))
26989 addr = force_reg (Pmode, XEXP (src, 0));
26990 scratch1 = gen_reg_rtx (Pmode);
26992 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26993 && !optimize_insn_for_size_p ())
26995 /* Well it seems that some optimizer does not combine a call like
26996 foo(strlen(bar), strlen(bar));
26997 when the move and the subtraction is done here. It does calculate
26998 the length just once when these instructions are done inside of
26999 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
27000 often used and I use one fewer register for the lifetime of
27001 output_strlen_unroll() this is better. */
27003 emit_move_insn (out, addr);
27005 ix86_expand_strlensi_unroll_1 (out, src, align);
27007 /* strlensi_unroll_1 returns the address of the zero at the end of
27008 the string, like memchr(), so compute the length by subtracting
27009 the start address. */
27010 emit_insn (ix86_gen_sub3 (out, out, addr));
27016 /* Can't use this if the user has appropriated eax, ecx, or edi. */
27017 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
27019 /* Can't use this for non-default address spaces. */
27020 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
27023 scratch2 = gen_reg_rtx (Pmode);
27024 scratch3 = gen_reg_rtx (Pmode);
27025 scratch4 = force_reg (Pmode, constm1_rtx);
27027 emit_move_insn (scratch3, addr);
27028 eoschar = force_reg (QImode, eoschar);
27030 src = replace_equiv_address_nv (src, scratch3);
27032 /* If .md starts supporting :P, this can be done in .md. */
27033 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27034 scratch4), UNSPEC_SCAS);
27035 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27036 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27037 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27042 /* For given symbol (function) construct code to compute address of it's PLT
27043 entry in large x86-64 PIC model. */
27045 construct_plt_address (rtx symbol)
27049 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27050 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27051 gcc_assert (Pmode == DImode);
27053 tmp = gen_reg_rtx (Pmode);
27054 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27056 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27057 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27062 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27064 rtx pop, bool sibcall)
27067 rtx use = NULL, call;
27068 unsigned int vec_len = 0;
27070 if (pop == const0_rtx)
27072 gcc_assert (!TARGET_64BIT || !pop);
27074 if (TARGET_MACHO && !TARGET_64BIT)
27077 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27078 fnaddr = machopic_indirect_call_target (fnaddr);
27083 /* Static functions and indirect calls don't need the pic register. Also,
27084 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27085 it an indirect call. */
27086 rtx addr = XEXP (fnaddr, 0);
27088 && GET_CODE (addr) == SYMBOL_REF
27089 && !SYMBOL_REF_LOCAL_P (addr))
27092 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27093 || !lookup_attribute ("noplt",
27094 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27097 || (ix86_cmodel == CM_LARGE_PIC
27098 && DEFAULT_ABI != MS_ABI))
27100 use_reg (&use, gen_rtx_REG (Pmode,
27101 REAL_PIC_OFFSET_TABLE_REGNUM));
27102 if (ix86_use_pseudo_pic_reg ())
27103 emit_move_insn (gen_rtx_REG (Pmode,
27104 REAL_PIC_OFFSET_TABLE_REGNUM),
27105 pic_offset_table_rtx);
27108 else if (!TARGET_PECOFF && !TARGET_MACHO)
27112 fnaddr = gen_rtx_UNSPEC (Pmode,
27113 gen_rtvec (1, addr),
27115 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27119 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27121 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27122 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27125 fnaddr = gen_const_mem (Pmode, fnaddr);
27126 /* Pmode may not be the same as word_mode for x32, which
27127 doesn't support indirect branch via 32-bit memory slot.
27128 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27129 indirect branch via x32 GOT slot is OK. */
27130 if (GET_MODE (fnaddr) != word_mode)
27131 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27132 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27137 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27138 parameters passed in vector registers. */
27140 && (INTVAL (callarg2) > 0
27141 || (INTVAL (callarg2) == 0
27142 && (TARGET_SSE || !flag_skip_rax_setup))))
27144 rtx al = gen_rtx_REG (QImode, AX_REG);
27145 emit_move_insn (al, callarg2);
27146 use_reg (&use, al);
27149 if (ix86_cmodel == CM_LARGE_PIC
27152 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27153 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27154 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27155 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27156 branch via x32 GOT slot is OK. */
27157 else if (!(TARGET_X32
27159 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27160 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27162 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27163 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27165 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27166 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27169 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27173 /* We should add bounds as destination register in case
27174 pointer with bounds may be returned. */
27175 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27177 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27178 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27179 if (GET_CODE (retval) == PARALLEL)
27181 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27182 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27183 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27184 retval = chkp_join_splitted_slot (retval, par);
27188 retval = gen_rtx_PARALLEL (VOIDmode,
27189 gen_rtvec (3, retval, b0, b1));
27190 chkp_put_regs_to_expr_list (retval);
27194 call = gen_rtx_SET (retval, call);
27196 vec[vec_len++] = call;
27200 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27201 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27202 vec[vec_len++] = pop;
27205 if (TARGET_64BIT_MS_ABI
27206 && (!callarg2 || INTVAL (callarg2) != -2))
27208 int const cregs_size
27209 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
27212 for (i = 0; i < cregs_size; i++)
27214 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27215 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27217 clobber_reg (&use, gen_rtx_REG (mode, regno));
27222 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27223 call = emit_call_insn (call);
27225 CALL_INSN_FUNCTION_USAGE (call) = use;
27230 /* Return true if the function being called was marked with attribute "noplt"
27231 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
27232 handle the non-PIC case in the backend because there is no easy interface
27233 for the front-end to force non-PLT calls to use the GOT. This is currently
27234 used only with 64-bit ELF targets to call the function marked "noplt"
27238 ix86_nopic_noplt_attribute_p (rtx call_op)
27240 if (flag_pic || ix86_cmodel == CM_LARGE
27241 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27242 || SYMBOL_REF_LOCAL_P (call_op))
27245 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27248 || (symbol_decl != NULL_TREE
27249 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27255 /* Output the assembly for a call instruction. */
27258 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27260 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27261 bool seh_nop_p = false;
27264 if (SIBLING_CALL_P (insn))
27266 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27267 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
27269 xasm = "%!jmp\t%P0";
27270 /* SEH epilogue detection requires the indirect branch case
27271 to include REX.W. */
27272 else if (TARGET_SEH)
27273 xasm = "%!rex.W jmp %A0";
27275 xasm = "%!jmp\t%A0";
27277 output_asm_insn (xasm, &call_op);
27281 /* SEH unwinding can require an extra nop to be emitted in several
27282 circumstances. Determine if we have one of those. */
27287 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27289 /* If we get to another real insn, we don't need the nop. */
27293 /* If we get to the epilogue note, prevent a catch region from
27294 being adjacent to the standard epilogue sequence. If non-
27295 call-exceptions, we'll have done this during epilogue emission. */
27296 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27297 && !flag_non_call_exceptions
27298 && !can_throw_internal (insn))
27305 /* If we didn't find a real insn following the call, prevent the
27306 unwinder from looking into the next function. */
27311 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27312 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
27314 xasm = "%!call\t%P0";
27316 xasm = "%!call\t%A0";
27318 output_asm_insn (xasm, &call_op);
27326 /* Clear stack slot assignments remembered from previous functions.
27327 This is called from INIT_EXPANDERS once before RTL is emitted for each
27330 static struct machine_function *
27331 ix86_init_machine_status (void)
27333 struct machine_function *f;
27335 f = ggc_cleared_alloc<machine_function> ();
27336 f->use_fast_prologue_epilogue_nregs = -1;
27337 f->call_abi = ix86_abi;
27342 /* Return a MEM corresponding to a stack slot with mode MODE.
27343 Allocate a new slot if necessary.
27345 The RTL for a function can have several slots available: N is
27346 which slot to use. */
27349 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27351 struct stack_local_entry *s;
27353 gcc_assert (n < MAX_386_STACK_LOCALS);
27355 for (s = ix86_stack_locals; s; s = s->next)
27356 if (s->mode == mode && s->n == n)
27357 return validize_mem (copy_rtx (s->rtl));
27359 s = ggc_alloc<stack_local_entry> ();
27362 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27364 s->next = ix86_stack_locals;
27365 ix86_stack_locals = s;
27366 return validize_mem (copy_rtx (s->rtl));
27370 ix86_instantiate_decls (void)
27372 struct stack_local_entry *s;
27374 for (s = ix86_stack_locals; s; s = s->next)
27375 if (s->rtl != NULL_RTX)
27376 instantiate_decl_rtl (s->rtl);
27379 /* Return the number used for encoding REG, in the range 0..7. */
27382 reg_encoded_number (rtx reg)
27384 unsigned regno = REGNO (reg);
27406 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27407 return regno - FIRST_STACK_REG;
27408 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27409 return regno - FIRST_SSE_REG;
27410 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27411 return regno - FIRST_MMX_REG;
27412 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27413 return regno - FIRST_REX_SSE_REG;
27414 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27415 return regno - FIRST_REX_INT_REG;
27416 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27417 return regno - FIRST_MASK_REG;
27418 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27419 return regno - FIRST_BND_REG;
27423 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27424 in its encoding if it could be relevant for ROP mitigation, otherwise
27425 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27426 used for calculating it into them. */
27429 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27430 int *popno0 = 0, int *popno1 = 0)
27432 if (asm_noperands (PATTERN (insn)) >= 0)
27434 int has_modrm = get_attr_modrm (insn);
27437 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27441 case MODRM_CLASS_OP02:
27442 gcc_assert (noperands >= 3);
27451 case MODRM_CLASS_OP01:
27452 gcc_assert (noperands >= 2);
27464 if (REG_P (op0) && REG_P (op1))
27466 int enc0 = reg_encoded_number (op0);
27467 int enc1 = reg_encoded_number (op1);
27468 return 0xc0 + (enc1 << 3) + enc0;
27473 /* Check whether x86 address PARTS is a pc-relative address. */
27476 rip_relative_addr_p (struct ix86_address *parts)
27478 rtx base, index, disp;
27480 base = parts->base;
27481 index = parts->index;
27482 disp = parts->disp;
27484 if (disp && !base && !index)
27490 if (GET_CODE (disp) == CONST)
27491 symbol = XEXP (disp, 0);
27492 if (GET_CODE (symbol) == PLUS
27493 && CONST_INT_P (XEXP (symbol, 1)))
27494 symbol = XEXP (symbol, 0);
27496 if (GET_CODE (symbol) == LABEL_REF
27497 || (GET_CODE (symbol) == SYMBOL_REF
27498 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27499 || (GET_CODE (symbol) == UNSPEC
27500 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27501 || XINT (symbol, 1) == UNSPEC_PCREL
27502 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27509 /* Calculate the length of the memory address in the instruction encoding.
27510 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27511 or other prefixes. We never generate addr32 prefix for LEA insn. */
27514 memory_address_length (rtx addr, bool lea)
27516 struct ix86_address parts;
27517 rtx base, index, disp;
27521 if (GET_CODE (addr) == PRE_DEC
27522 || GET_CODE (addr) == POST_INC
27523 || GET_CODE (addr) == PRE_MODIFY
27524 || GET_CODE (addr) == POST_MODIFY)
27527 ok = ix86_decompose_address (addr, &parts);
27530 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
27532 /* If this is not LEA instruction, add the length of addr32 prefix. */
27533 if (TARGET_64BIT && !lea
27534 && (SImode_address_operand (addr, VOIDmode)
27535 || (parts.base && GET_MODE (parts.base) == SImode)
27536 || (parts.index && GET_MODE (parts.index) == SImode)))
27540 index = parts.index;
27543 if (base && SUBREG_P (base))
27544 base = SUBREG_REG (base);
27545 if (index && SUBREG_P (index))
27546 index = SUBREG_REG (index);
27548 gcc_assert (base == NULL_RTX || REG_P (base));
27549 gcc_assert (index == NULL_RTX || REG_P (index));
27552 - esp as the base always wants an index,
27553 - ebp as the base always wants a displacement,
27554 - r12 as the base always wants an index,
27555 - r13 as the base always wants a displacement. */
27557 /* Register Indirect. */
27558 if (base && !index && !disp)
27560 /* esp (for its index) and ebp (for its displacement) need
27561 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27563 if (base == arg_pointer_rtx
27564 || base == frame_pointer_rtx
27565 || REGNO (base) == SP_REG
27566 || REGNO (base) == BP_REG
27567 || REGNO (base) == R12_REG
27568 || REGNO (base) == R13_REG)
27572 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27573 is not disp32, but disp32(%rip), so for disp32
27574 SIB byte is needed, unless print_operand_address
27575 optimizes it into disp32(%rip) or (%rip) is implied
27577 else if (disp && !base && !index)
27580 if (rip_relative_addr_p (&parts))
27585 /* Find the length of the displacement constant. */
27588 if (base && satisfies_constraint_K (disp))
27593 /* ebp always wants a displacement. Similarly r13. */
27594 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27597 /* An index requires the two-byte modrm form.... */
27599 /* ...like esp (or r12), which always wants an index. */
27600 || base == arg_pointer_rtx
27601 || base == frame_pointer_rtx
27602 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27609 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27610 is set, expect that insn have 8bit immediate alternative. */
27612 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27616 extract_insn_cached (insn);
27617 for (i = recog_data.n_operands - 1; i >= 0; --i)
27618 if (CONSTANT_P (recog_data.operand[i]))
27620 enum attr_mode mode = get_attr_mode (insn);
27623 if (shortform && CONST_INT_P (recog_data.operand[i]))
27625 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27632 ival = trunc_int_for_mode (ival, HImode);
27635 ival = trunc_int_for_mode (ival, SImode);
27640 if (IN_RANGE (ival, -128, 127))
27657 /* Immediates for DImode instructions are encoded
27658 as 32bit sign extended values. */
27663 fatal_insn ("unknown insn mode", insn);
27669 /* Compute default value for "length_address" attribute. */
27671 ix86_attr_length_address_default (rtx_insn *insn)
27675 if (get_attr_type (insn) == TYPE_LEA)
27677 rtx set = PATTERN (insn), addr;
27679 if (GET_CODE (set) == PARALLEL)
27680 set = XVECEXP (set, 0, 0);
27682 gcc_assert (GET_CODE (set) == SET);
27684 addr = SET_SRC (set);
27686 return memory_address_length (addr, true);
27689 extract_insn_cached (insn);
27690 for (i = recog_data.n_operands - 1; i >= 0; --i)
27692 rtx op = recog_data.operand[i];
27695 constrain_operands_cached (insn, reload_completed);
27696 if (which_alternative != -1)
27698 const char *constraints = recog_data.constraints[i];
27699 int alt = which_alternative;
27701 while (*constraints == '=' || *constraints == '+')
27704 while (*constraints++ != ',')
27706 /* Skip ignored operands. */
27707 if (*constraints == 'X')
27711 int len = memory_address_length (XEXP (op, 0), false);
27713 /* Account for segment prefix for non-default addr spaces. */
27714 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
27723 /* Compute default value for "length_vex" attribute. It includes
27724 2 or 3 byte VEX prefix and 1 opcode byte. */
27727 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27732 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27733 byte VEX prefix. */
27734 if (!has_0f_opcode || has_vex_w)
27737 /* We can always use 2 byte VEX prefix in 32bit. */
27741 extract_insn_cached (insn);
27743 for (i = recog_data.n_operands - 1; i >= 0; --i)
27744 if (REG_P (recog_data.operand[i]))
27746 /* REX.W bit uses 3 byte VEX prefix. */
27747 if (GET_MODE (recog_data.operand[i]) == DImode
27748 && GENERAL_REG_P (recog_data.operand[i]))
27753 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27754 if (MEM_P (recog_data.operand[i])
27755 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27762 /* Return the maximum number of instructions a cpu can issue. */
27765 ix86_issue_rate (void)
27769 case PROCESSOR_PENTIUM:
27770 case PROCESSOR_LAKEMONT:
27771 case PROCESSOR_BONNELL:
27772 case PROCESSOR_SILVERMONT:
27773 case PROCESSOR_KNL:
27774 case PROCESSOR_INTEL:
27776 case PROCESSOR_BTVER2:
27777 case PROCESSOR_PENTIUM4:
27778 case PROCESSOR_NOCONA:
27781 case PROCESSOR_PENTIUMPRO:
27782 case PROCESSOR_ATHLON:
27784 case PROCESSOR_AMDFAM10:
27785 case PROCESSOR_GENERIC:
27786 case PROCESSOR_BTVER1:
27789 case PROCESSOR_BDVER1:
27790 case PROCESSOR_BDVER2:
27791 case PROCESSOR_BDVER3:
27792 case PROCESSOR_BDVER4:
27793 case PROCESSOR_ZNVER1:
27794 case PROCESSOR_CORE2:
27795 case PROCESSOR_NEHALEM:
27796 case PROCESSOR_SANDYBRIDGE:
27797 case PROCESSOR_HASWELL:
27805 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27806 by DEP_INSN and nothing set by DEP_INSN. */
27809 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27813 /* Simplify the test for uninteresting insns. */
27814 if (insn_type != TYPE_SETCC
27815 && insn_type != TYPE_ICMOV
27816 && insn_type != TYPE_FCMOV
27817 && insn_type != TYPE_IBR)
27820 if ((set = single_set (dep_insn)) != 0)
27822 set = SET_DEST (set);
27825 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27826 && XVECLEN (PATTERN (dep_insn), 0) == 2
27827 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27828 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27830 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27831 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27836 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27839 /* This test is true if the dependent insn reads the flags but
27840 not any other potentially set register. */
27841 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27844 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27850 /* Return true iff USE_INSN has a memory address with operands set by
27854 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27857 extract_insn_cached (use_insn);
27858 for (i = recog_data.n_operands - 1; i >= 0; --i)
27859 if (MEM_P (recog_data.operand[i]))
27861 rtx addr = XEXP (recog_data.operand[i], 0);
27862 return modified_in_p (addr, set_insn) != 0;
27867 /* Helper function for exact_store_load_dependency.
27868 Return true if addr is found in insn. */
27870 exact_dependency_1 (rtx addr, rtx insn)
27872 enum rtx_code code;
27873 const char *format_ptr;
27876 code = GET_CODE (insn);
27880 if (rtx_equal_p (addr, insn))
27895 format_ptr = GET_RTX_FORMAT (code);
27896 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27898 switch (*format_ptr++)
27901 if (exact_dependency_1 (addr, XEXP (insn, i)))
27905 for (j = 0; j < XVECLEN (insn, i); j++)
27906 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27914 /* Return true if there exists exact dependency for store & load, i.e.
27915 the same memory address is used in them. */
27917 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27921 set1 = single_set (store);
27924 if (!MEM_P (SET_DEST (set1)))
27926 set2 = single_set (load);
27929 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27935 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27937 enum attr_type insn_type, dep_insn_type;
27938 enum attr_memory memory;
27940 int dep_insn_code_number;
27942 /* Anti and output dependencies have zero cost on all CPUs. */
27943 if (REG_NOTE_KIND (link) != 0)
27946 dep_insn_code_number = recog_memoized (dep_insn);
27948 /* If we can't recognize the insns, we can't really do anything. */
27949 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
27952 insn_type = get_attr_type (insn);
27953 dep_insn_type = get_attr_type (dep_insn);
27957 case PROCESSOR_PENTIUM:
27958 case PROCESSOR_LAKEMONT:
27959 /* Address Generation Interlock adds a cycle of latency. */
27960 if (insn_type == TYPE_LEA)
27962 rtx addr = PATTERN (insn);
27964 if (GET_CODE (addr) == PARALLEL)
27965 addr = XVECEXP (addr, 0, 0);
27967 gcc_assert (GET_CODE (addr) == SET);
27969 addr = SET_SRC (addr);
27970 if (modified_in_p (addr, dep_insn))
27973 else if (ix86_agi_dependent (dep_insn, insn))
27976 /* ??? Compares pair with jump/setcc. */
27977 if (ix86_flags_dependent (insn, dep_insn, insn_type))
27980 /* Floating point stores require value to be ready one cycle earlier. */
27981 if (insn_type == TYPE_FMOV
27982 && get_attr_memory (insn) == MEMORY_STORE
27983 && !ix86_agi_dependent (dep_insn, insn))
27987 case PROCESSOR_PENTIUMPRO:
27988 /* INT->FP conversion is expensive. */
27989 if (get_attr_fp_int_src (dep_insn))
27992 /* There is one cycle extra latency between an FP op and a store. */
27993 if (insn_type == TYPE_FMOV
27994 && (set = single_set (dep_insn)) != NULL_RTX
27995 && (set2 = single_set (insn)) != NULL_RTX
27996 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
27997 && MEM_P (SET_DEST (set2)))
28000 memory = get_attr_memory (insn);
28002 /* Show ability of reorder buffer to hide latency of load by executing
28003 in parallel with previous instruction in case
28004 previous instruction is not needed to compute the address. */
28005 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28006 && !ix86_agi_dependent (dep_insn, insn))
28008 /* Claim moves to take one cycle, as core can issue one load
28009 at time and the next load can start cycle later. */
28010 if (dep_insn_type == TYPE_IMOV
28011 || dep_insn_type == TYPE_FMOV)
28019 /* The esp dependency is resolved before
28020 the instruction is really finished. */
28021 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28022 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28025 /* INT->FP conversion is expensive. */
28026 if (get_attr_fp_int_src (dep_insn))
28029 memory = get_attr_memory (insn);
28031 /* Show ability of reorder buffer to hide latency of load by executing
28032 in parallel with previous instruction in case
28033 previous instruction is not needed to compute the address. */
28034 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28035 && !ix86_agi_dependent (dep_insn, insn))
28037 /* Claim moves to take one cycle, as core can issue one load
28038 at time and the next load can start cycle later. */
28039 if (dep_insn_type == TYPE_IMOV
28040 || dep_insn_type == TYPE_FMOV)
28049 case PROCESSOR_AMDFAM10:
28050 case PROCESSOR_BDVER1:
28051 case PROCESSOR_BDVER2:
28052 case PROCESSOR_BDVER3:
28053 case PROCESSOR_BDVER4:
28054 case PROCESSOR_ZNVER1:
28055 case PROCESSOR_BTVER1:
28056 case PROCESSOR_BTVER2:
28057 case PROCESSOR_GENERIC:
28058 /* Stack engine allows to execute push&pop instructions in parall. */
28059 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28060 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28064 case PROCESSOR_ATHLON:
28066 memory = get_attr_memory (insn);
28068 /* Show ability of reorder buffer to hide latency of load by executing
28069 in parallel with previous instruction in case
28070 previous instruction is not needed to compute the address. */
28071 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28072 && !ix86_agi_dependent (dep_insn, insn))
28074 enum attr_unit unit = get_attr_unit (insn);
28077 /* Because of the difference between the length of integer and
28078 floating unit pipeline preparation stages, the memory operands
28079 for floating point are cheaper.
28081 ??? For Athlon it the difference is most probably 2. */
28082 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28085 loadcost = TARGET_ATHLON ? 2 : 0;
28087 if (cost >= loadcost)
28094 case PROCESSOR_CORE2:
28095 case PROCESSOR_NEHALEM:
28096 case PROCESSOR_SANDYBRIDGE:
28097 case PROCESSOR_HASWELL:
28098 /* Stack engine allows to execute push&pop instructions in parall. */
28099 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28100 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28103 memory = get_attr_memory (insn);
28105 /* Show ability of reorder buffer to hide latency of load by executing
28106 in parallel with previous instruction in case
28107 previous instruction is not needed to compute the address. */
28108 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28109 && !ix86_agi_dependent (dep_insn, insn))
28118 case PROCESSOR_SILVERMONT:
28119 case PROCESSOR_KNL:
28120 case PROCESSOR_INTEL:
28121 if (!reload_completed)
28124 /* Increase cost of integer loads. */
28125 memory = get_attr_memory (dep_insn);
28126 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28128 enum attr_unit unit = get_attr_unit (dep_insn);
28129 if (unit == UNIT_INTEGER && cost == 1)
28131 if (memory == MEMORY_LOAD)
28135 /* Increase cost of ld/st for short int types only
28136 because of store forwarding issue. */
28137 rtx set = single_set (dep_insn);
28138 if (set && (GET_MODE (SET_DEST (set)) == QImode
28139 || GET_MODE (SET_DEST (set)) == HImode))
28141 /* Increase cost of store/load insn if exact
28142 dependence exists and it is load insn. */
28143 enum attr_memory insn_memory = get_attr_memory (insn);
28144 if (insn_memory == MEMORY_LOAD
28145 && exact_store_load_dependency (dep_insn, insn))
28159 /* How many alternative schedules to try. This should be as wide as the
28160 scheduling freedom in the DFA, but no wider. Making this value too
28161 large results extra work for the scheduler. */
28164 ia32_multipass_dfa_lookahead (void)
28168 case PROCESSOR_PENTIUM:
28169 case PROCESSOR_LAKEMONT:
28172 case PROCESSOR_PENTIUMPRO:
28176 case PROCESSOR_BDVER1:
28177 case PROCESSOR_BDVER2:
28178 case PROCESSOR_BDVER3:
28179 case PROCESSOR_BDVER4:
28180 /* We use lookahead value 4 for BD both before and after reload
28181 schedules. Plan is to have value 8 included for O3. */
28184 case PROCESSOR_CORE2:
28185 case PROCESSOR_NEHALEM:
28186 case PROCESSOR_SANDYBRIDGE:
28187 case PROCESSOR_HASWELL:
28188 case PROCESSOR_BONNELL:
28189 case PROCESSOR_SILVERMONT:
28190 case PROCESSOR_KNL:
28191 case PROCESSOR_INTEL:
28192 /* Generally, we want haifa-sched:max_issue() to look ahead as far
28193 as many instructions can be executed on a cycle, i.e.,
28194 issue_rate. I wonder why tuning for many CPUs does not do this. */
28195 if (reload_completed)
28196 return ix86_issue_rate ();
28197 /* Don't use lookahead for pre-reload schedule to save compile time. */
28205 /* Return true if target platform supports macro-fusion. */
28208 ix86_macro_fusion_p ()
28210 return TARGET_FUSE_CMP_AND_BRANCH;
28213 /* Check whether current microarchitecture support macro fusion
28214 for insn pair "CONDGEN + CONDJMP". Refer to
28215 "Intel Architectures Optimization Reference Manual". */
28218 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
28221 enum rtx_code ccode;
28222 rtx compare_set = NULL_RTX, test_if, cond;
28223 rtx alu_set = NULL_RTX, addr = NULL_RTX;
28225 if (!any_condjump_p (condjmp))
28228 if (get_attr_type (condgen) != TYPE_TEST
28229 && get_attr_type (condgen) != TYPE_ICMP
28230 && get_attr_type (condgen) != TYPE_INCDEC
28231 && get_attr_type (condgen) != TYPE_ALU)
28234 compare_set = single_set (condgen);
28235 if (compare_set == NULL_RTX
28236 && !TARGET_FUSE_ALU_AND_BRANCH)
28239 if (compare_set == NULL_RTX)
28242 rtx pat = PATTERN (condgen);
28243 for (i = 0; i < XVECLEN (pat, 0); i++)
28244 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28246 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
28247 if (GET_CODE (set_src) == COMPARE)
28248 compare_set = XVECEXP (pat, 0, i);
28250 alu_set = XVECEXP (pat, 0, i);
28253 if (compare_set == NULL_RTX)
28255 src = SET_SRC (compare_set);
28256 if (GET_CODE (src) != COMPARE)
28259 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
28261 if ((MEM_P (XEXP (src, 0))
28262 && CONST_INT_P (XEXP (src, 1)))
28263 || (MEM_P (XEXP (src, 1))
28264 && CONST_INT_P (XEXP (src, 0))))
28267 /* No fusion for RIP-relative address. */
28268 if (MEM_P (XEXP (src, 0)))
28269 addr = XEXP (XEXP (src, 0), 0);
28270 else if (MEM_P (XEXP (src, 1)))
28271 addr = XEXP (XEXP (src, 1), 0);
28274 ix86_address parts;
28275 int ok = ix86_decompose_address (addr, &parts);
28278 if (rip_relative_addr_p (&parts))
28282 test_if = SET_SRC (pc_set (condjmp));
28283 cond = XEXP (test_if, 0);
28284 ccode = GET_CODE (cond);
28285 /* Check whether conditional jump use Sign or Overflow Flags. */
28286 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
28293 /* Return true for TYPE_TEST and TYPE_ICMP. */
28294 if (get_attr_type (condgen) == TYPE_TEST
28295 || get_attr_type (condgen) == TYPE_ICMP)
28298 /* The following is the case that macro-fusion for alu + jmp. */
28299 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
28302 /* No fusion for alu op with memory destination operand. */
28303 dest = SET_DEST (alu_set);
28307 /* Macro-fusion for inc/dec + unsigned conditional jump is not
28309 if (get_attr_type (condgen) == TYPE_INCDEC
28319 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
28320 execution. It is applied if
28321 (1) IMUL instruction is on the top of list;
28322 (2) There exists the only producer of independent IMUL instruction in
28324 Return index of IMUL producer if it was found and -1 otherwise. */
28326 do_reorder_for_imul (rtx_insn **ready, int n_ready)
28329 rtx set, insn1, insn2;
28330 sd_iterator_def sd_it;
28335 if (!TARGET_BONNELL)
28338 /* Check that IMUL instruction is on the top of ready list. */
28339 insn = ready[n_ready - 1];
28340 set = single_set (insn);
28343 if (!(GET_CODE (SET_SRC (set)) == MULT
28344 && GET_MODE (SET_SRC (set)) == SImode))
28347 /* Search for producer of independent IMUL instruction. */
28348 for (i = n_ready - 2; i >= 0; i--)
28351 if (!NONDEBUG_INSN_P (insn))
28353 /* Skip IMUL instruction. */
28354 insn2 = PATTERN (insn);
28355 if (GET_CODE (insn2) == PARALLEL)
28356 insn2 = XVECEXP (insn2, 0, 0);
28357 if (GET_CODE (insn2) == SET
28358 && GET_CODE (SET_SRC (insn2)) == MULT
28359 && GET_MODE (SET_SRC (insn2)) == SImode)
28362 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
28365 con = DEP_CON (dep);
28366 if (!NONDEBUG_INSN_P (con))
28368 insn1 = PATTERN (con);
28369 if (GET_CODE (insn1) == PARALLEL)
28370 insn1 = XVECEXP (insn1, 0, 0);
28372 if (GET_CODE (insn1) == SET
28373 && GET_CODE (SET_SRC (insn1)) == MULT
28374 && GET_MODE (SET_SRC (insn1)) == SImode)
28376 sd_iterator_def sd_it1;
28378 /* Check if there is no other dependee for IMUL. */
28380 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
28383 pro = DEP_PRO (dep1);
28384 if (!NONDEBUG_INSN_P (pro))
28399 /* Try to find the best candidate on the top of ready list if two insns
28400 have the same priority - candidate is best if its dependees were
28401 scheduled earlier. Applied for Silvermont only.
28402 Return true if top 2 insns must be interchanged. */
28404 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
28406 rtx_insn *top = ready[n_ready - 1];
28407 rtx_insn *next = ready[n_ready - 2];
28409 sd_iterator_def sd_it;
28413 #define INSN_TICK(INSN) (HID (INSN)->tick)
28415 if (!TARGET_SILVERMONT && !TARGET_INTEL)
28418 if (!NONDEBUG_INSN_P (top))
28420 if (!NONJUMP_INSN_P (top))
28422 if (!NONDEBUG_INSN_P (next))
28424 if (!NONJUMP_INSN_P (next))
28426 set = single_set (top);
28429 set = single_set (next);
28433 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
28435 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
28437 /* Determine winner more precise. */
28438 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
28441 pro = DEP_PRO (dep);
28442 if (!NONDEBUG_INSN_P (pro))
28444 if (INSN_TICK (pro) > clock1)
28445 clock1 = INSN_TICK (pro);
28447 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
28450 pro = DEP_PRO (dep);
28451 if (!NONDEBUG_INSN_P (pro))
28453 if (INSN_TICK (pro) > clock2)
28454 clock2 = INSN_TICK (pro);
28457 if (clock1 == clock2)
28459 /* Determine winner - load must win. */
28460 enum attr_memory memory1, memory2;
28461 memory1 = get_attr_memory (top);
28462 memory2 = get_attr_memory (next);
28463 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
28466 return (bool) (clock2 < clock1);
28472 /* Perform possible reodering of ready list for Atom/Silvermont only.
28473 Return issue rate. */
28475 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28476 int *pn_ready, int clock_var)
28478 int issue_rate = -1;
28479 int n_ready = *pn_ready;
28484 /* Set up issue rate. */
28485 issue_rate = ix86_issue_rate ();
28487 /* Do reodering for BONNELL/SILVERMONT only. */
28488 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28491 /* Nothing to do if ready list contains only 1 instruction. */
28495 /* Do reodering for post-reload scheduler only. */
28496 if (!reload_completed)
28499 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28501 if (sched_verbose > 1)
28502 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28503 INSN_UID (ready[index]));
28505 /* Put IMUL producer (ready[index]) at the top of ready list. */
28506 insn = ready[index];
28507 for (i = index; i < n_ready - 1; i++)
28508 ready[i] = ready[i + 1];
28509 ready[n_ready - 1] = insn;
28513 /* Skip selective scheduling since HID is not populated in it. */
28516 && swap_top_of_ready_list (ready, n_ready))
28518 if (sched_verbose > 1)
28519 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28520 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28521 /* Swap 2 top elements of ready list. */
28522 insn = ready[n_ready - 1];
28523 ready[n_ready - 1] = ready[n_ready - 2];
28524 ready[n_ready - 2] = insn;
28530 ix86_class_likely_spilled_p (reg_class_t);
28532 /* Returns true if lhs of insn is HW function argument register and set up
28533 is_spilled to true if it is likely spilled HW register. */
28535 insn_is_function_arg (rtx insn, bool* is_spilled)
28539 if (!NONDEBUG_INSN_P (insn))
28541 /* Call instructions are not movable, ignore it. */
28544 insn = PATTERN (insn);
28545 if (GET_CODE (insn) == PARALLEL)
28546 insn = XVECEXP (insn, 0, 0);
28547 if (GET_CODE (insn) != SET)
28549 dst = SET_DEST (insn);
28550 if (REG_P (dst) && HARD_REGISTER_P (dst)
28551 && ix86_function_arg_regno_p (REGNO (dst)))
28553 /* Is it likely spilled HW register? */
28554 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28555 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28556 *is_spilled = true;
28562 /* Add output dependencies for chain of function adjacent arguments if only
28563 there is a move to likely spilled HW register. Return first argument
28564 if at least one dependence was added or NULL otherwise. */
28566 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28569 rtx_insn *last = call;
28570 rtx_insn *first_arg = NULL;
28571 bool is_spilled = false;
28573 head = PREV_INSN (head);
28575 /* Find nearest to call argument passing instruction. */
28578 last = PREV_INSN (last);
28581 if (!NONDEBUG_INSN_P (last))
28583 if (insn_is_function_arg (last, &is_spilled))
28591 insn = PREV_INSN (last);
28592 if (!INSN_P (insn))
28596 if (!NONDEBUG_INSN_P (insn))
28601 if (insn_is_function_arg (insn, &is_spilled))
28603 /* Add output depdendence between two function arguments if chain
28604 of output arguments contains likely spilled HW registers. */
28606 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28607 first_arg = last = insn;
28617 /* Add output or anti dependency from insn to first_arg to restrict its code
28620 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28625 /* Add anti dependencies for bounds stores. */
28627 && GET_CODE (PATTERN (insn)) == PARALLEL
28628 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28629 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28631 add_dependence (first_arg, insn, REG_DEP_ANTI);
28635 set = single_set (insn);
28638 tmp = SET_DEST (set);
28641 /* Add output dependency to the first function argument. */
28642 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28645 /* Add anti dependency. */
28646 add_dependence (first_arg, insn, REG_DEP_ANTI);
28649 /* Avoid cross block motion of function argument through adding dependency
28650 from the first non-jump instruction in bb. */
28652 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28654 rtx_insn *insn = BB_END (bb);
28658 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28660 rtx set = single_set (insn);
28663 avoid_func_arg_motion (arg, insn);
28667 if (insn == BB_HEAD (bb))
28669 insn = PREV_INSN (insn);
28673 /* Hook for pre-reload schedule - avoid motion of function arguments
28674 passed in likely spilled HW registers. */
28676 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28679 rtx_insn *first_arg = NULL;
28680 if (reload_completed)
28682 while (head != tail && DEBUG_INSN_P (head))
28683 head = NEXT_INSN (head);
28684 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28685 if (INSN_P (insn) && CALL_P (insn))
28687 first_arg = add_parameter_dependencies (insn, head);
28690 /* Add dependee for first argument to predecessors if only
28691 region contains more than one block. */
28692 basic_block bb = BLOCK_FOR_INSN (insn);
28693 int rgn = CONTAINING_RGN (bb->index);
28694 int nr_blks = RGN_NR_BLOCKS (rgn);
28695 /* Skip trivial regions and region head blocks that can have
28696 predecessors outside of region. */
28697 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28702 /* Regions are SCCs with the exception of selective
28703 scheduling with pipelining of outer blocks enabled.
28704 So also check that immediate predecessors of a non-head
28705 block are in the same region. */
28706 FOR_EACH_EDGE (e, ei, bb->preds)
28708 /* Avoid creating of loop-carried dependencies through
28709 using topological ordering in the region. */
28710 if (rgn == CONTAINING_RGN (e->src->index)
28711 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28712 add_dependee_for_func_arg (first_arg, e->src);
28720 else if (first_arg)
28721 avoid_func_arg_motion (first_arg, insn);
28724 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28725 HW registers to maximum, to schedule them at soon as possible. These are
28726 moves from function argument registers at the top of the function entry
28727 and moves from function return value registers after call. */
28729 ix86_adjust_priority (rtx_insn *insn, int priority)
28733 if (reload_completed)
28736 if (!NONDEBUG_INSN_P (insn))
28739 set = single_set (insn);
28742 rtx tmp = SET_SRC (set);
28744 && HARD_REGISTER_P (tmp)
28745 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28746 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28747 return current_sched_info->sched_max_insns_priority;
28753 /* Model decoder of Core 2/i7.
28754 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28755 track the instruction fetch block boundaries and make sure that long
28756 (9+ bytes) instructions are assigned to D0. */
28758 /* Maximum length of an insn that can be handled by
28759 a secondary decoder unit. '8' for Core 2/i7. */
28760 static int core2i7_secondary_decoder_max_insn_size;
28762 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28763 '16' for Core 2/i7. */
28764 static int core2i7_ifetch_block_size;
28766 /* Maximum number of instructions decoder can handle per cycle.
28767 '6' for Core 2/i7. */
28768 static int core2i7_ifetch_block_max_insns;
28770 typedef struct ix86_first_cycle_multipass_data_ *
28771 ix86_first_cycle_multipass_data_t;
28772 typedef const struct ix86_first_cycle_multipass_data_ *
28773 const_ix86_first_cycle_multipass_data_t;
28775 /* A variable to store target state across calls to max_issue within
28777 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28778 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28780 /* Initialize DATA. */
28782 core2i7_first_cycle_multipass_init (void *_data)
28784 ix86_first_cycle_multipass_data_t data
28785 = (ix86_first_cycle_multipass_data_t) _data;
28787 data->ifetch_block_len = 0;
28788 data->ifetch_block_n_insns = 0;
28789 data->ready_try_change = NULL;
28790 data->ready_try_change_size = 0;
28793 /* Advancing the cycle; reset ifetch block counts. */
28795 core2i7_dfa_post_advance_cycle (void)
28797 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28799 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28801 data->ifetch_block_len = 0;
28802 data->ifetch_block_n_insns = 0;
28805 static int min_insn_size (rtx_insn *);
28807 /* Filter out insns from ready_try that the core will not be able to issue
28808 on current cycle due to decoder. */
28810 core2i7_first_cycle_multipass_filter_ready_try
28811 (const_ix86_first_cycle_multipass_data_t data,
28812 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28819 if (ready_try[n_ready])
28822 insn = get_ready_element (n_ready);
28823 insn_size = min_insn_size (insn);
28825 if (/* If this is a too long an insn for a secondary decoder ... */
28826 (!first_cycle_insn_p
28827 && insn_size > core2i7_secondary_decoder_max_insn_size)
28828 /* ... or it would not fit into the ifetch block ... */
28829 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28830 /* ... or the decoder is full already ... */
28831 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28832 /* ... mask the insn out. */
28834 ready_try[n_ready] = 1;
28836 if (data->ready_try_change)
28837 bitmap_set_bit (data->ready_try_change, n_ready);
28842 /* Prepare for a new round of multipass lookahead scheduling. */
28844 core2i7_first_cycle_multipass_begin (void *_data,
28845 signed char *ready_try, int n_ready,
28846 bool first_cycle_insn_p)
28848 ix86_first_cycle_multipass_data_t data
28849 = (ix86_first_cycle_multipass_data_t) _data;
28850 const_ix86_first_cycle_multipass_data_t prev_data
28851 = ix86_first_cycle_multipass_data;
28853 /* Restore the state from the end of the previous round. */
28854 data->ifetch_block_len = prev_data->ifetch_block_len;
28855 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28857 /* Filter instructions that cannot be issued on current cycle due to
28858 decoder restrictions. */
28859 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28860 first_cycle_insn_p);
28863 /* INSN is being issued in current solution. Account for its impact on
28864 the decoder model. */
28866 core2i7_first_cycle_multipass_issue (void *_data,
28867 signed char *ready_try, int n_ready,
28868 rtx_insn *insn, const void *_prev_data)
28870 ix86_first_cycle_multipass_data_t data
28871 = (ix86_first_cycle_multipass_data_t) _data;
28872 const_ix86_first_cycle_multipass_data_t prev_data
28873 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28875 int insn_size = min_insn_size (insn);
28877 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28878 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28879 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28880 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28882 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28883 if (!data->ready_try_change)
28885 data->ready_try_change = sbitmap_alloc (n_ready);
28886 data->ready_try_change_size = n_ready;
28888 else if (data->ready_try_change_size < n_ready)
28890 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28892 data->ready_try_change_size = n_ready;
28894 bitmap_clear (data->ready_try_change);
28896 /* Filter out insns from ready_try that the core will not be able to issue
28897 on current cycle due to decoder. */
28898 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28902 /* Revert the effect on ready_try. */
28904 core2i7_first_cycle_multipass_backtrack (const void *_data,
28905 signed char *ready_try,
28906 int n_ready ATTRIBUTE_UNUSED)
28908 const_ix86_first_cycle_multipass_data_t data
28909 = (const_ix86_first_cycle_multipass_data_t) _data;
28910 unsigned int i = 0;
28911 sbitmap_iterator sbi;
28913 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28914 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28920 /* Save the result of multipass lookahead scheduling for the next round. */
28922 core2i7_first_cycle_multipass_end (const void *_data)
28924 const_ix86_first_cycle_multipass_data_t data
28925 = (const_ix86_first_cycle_multipass_data_t) _data;
28926 ix86_first_cycle_multipass_data_t next_data
28927 = ix86_first_cycle_multipass_data;
28931 next_data->ifetch_block_len = data->ifetch_block_len;
28932 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28936 /* Deallocate target data. */
28938 core2i7_first_cycle_multipass_fini (void *_data)
28940 ix86_first_cycle_multipass_data_t data
28941 = (ix86_first_cycle_multipass_data_t) _data;
28943 if (data->ready_try_change)
28945 sbitmap_free (data->ready_try_change);
28946 data->ready_try_change = NULL;
28947 data->ready_try_change_size = 0;
28951 /* Prepare for scheduling pass. */
28953 ix86_sched_init_global (FILE *, int, int)
28955 /* Install scheduling hooks for current CPU. Some of these hooks are used
28956 in time-critical parts of the scheduler, so we only set them up when
28957 they are actually used. */
28960 case PROCESSOR_CORE2:
28961 case PROCESSOR_NEHALEM:
28962 case PROCESSOR_SANDYBRIDGE:
28963 case PROCESSOR_HASWELL:
28964 /* Do not perform multipass scheduling for pre-reload schedule
28965 to save compile time. */
28966 if (reload_completed)
28968 targetm.sched.dfa_post_advance_cycle
28969 = core2i7_dfa_post_advance_cycle;
28970 targetm.sched.first_cycle_multipass_init
28971 = core2i7_first_cycle_multipass_init;
28972 targetm.sched.first_cycle_multipass_begin
28973 = core2i7_first_cycle_multipass_begin;
28974 targetm.sched.first_cycle_multipass_issue
28975 = core2i7_first_cycle_multipass_issue;
28976 targetm.sched.first_cycle_multipass_backtrack
28977 = core2i7_first_cycle_multipass_backtrack;
28978 targetm.sched.first_cycle_multipass_end
28979 = core2i7_first_cycle_multipass_end;
28980 targetm.sched.first_cycle_multipass_fini
28981 = core2i7_first_cycle_multipass_fini;
28983 /* Set decoder parameters. */
28984 core2i7_secondary_decoder_max_insn_size = 8;
28985 core2i7_ifetch_block_size = 16;
28986 core2i7_ifetch_block_max_insns = 6;
28989 /* ... Fall through ... */
28991 targetm.sched.dfa_post_advance_cycle = NULL;
28992 targetm.sched.first_cycle_multipass_init = NULL;
28993 targetm.sched.first_cycle_multipass_begin = NULL;
28994 targetm.sched.first_cycle_multipass_issue = NULL;
28995 targetm.sched.first_cycle_multipass_backtrack = NULL;
28996 targetm.sched.first_cycle_multipass_end = NULL;
28997 targetm.sched.first_cycle_multipass_fini = NULL;
29003 /* Compute the alignment given to a constant that is being placed in memory.
29004 EXP is the constant and ALIGN is the alignment that the object would
29006 The value of this function is used instead of that alignment to align
29010 ix86_constant_alignment (tree exp, int align)
29012 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
29013 || TREE_CODE (exp) == INTEGER_CST)
29015 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
29017 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
29020 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
29021 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
29022 return BITS_PER_WORD;
29027 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
29028 the data type, and ALIGN is the alignment that the object would
29029 ordinarily have. */
29032 iamcu_alignment (tree type, int align)
29034 enum machine_mode mode;
29036 if (align < 32 || TYPE_USER_ALIGN (type))
29039 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29041 mode = TYPE_MODE (strip_array_types (type));
29042 switch (GET_MODE_CLASS (mode))
29045 case MODE_COMPLEX_INT:
29046 case MODE_COMPLEX_FLOAT:
29048 case MODE_DECIMAL_FLOAT:
29055 /* Compute the alignment for a static variable.
29056 TYPE is the data type, and ALIGN is the alignment that
29057 the object would ordinarily have. The value of this function is used
29058 instead of that alignment to align the object. */
29061 ix86_data_alignment (tree type, int align, bool opt)
29063 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29064 for symbols from other compilation units or symbols that don't need
29065 to bind locally. In order to preserve some ABI compatibility with
29066 those compilers, ensure we don't decrease alignment from what we
29069 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29071 /* A data structure, equal or greater than the size of a cache line
29072 (64 bytes in the Pentium 4 and other recent Intel processors, including
29073 processors based on Intel Core microarchitecture) should be aligned
29074 so that its base address is a multiple of a cache line size. */
29077 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29079 if (max_align < BITS_PER_WORD)
29080 max_align = BITS_PER_WORD;
29082 switch (ix86_align_data_type)
29084 case ix86_align_data_type_abi: opt = false; break;
29085 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29086 case ix86_align_data_type_cacheline: break;
29090 align = iamcu_alignment (type, align);
29093 && AGGREGATE_TYPE_P (type)
29094 && TYPE_SIZE (type)
29095 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29097 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29098 && align < max_align_compat)
29099 align = max_align_compat;
29100 if (wi::geu_p (TYPE_SIZE (type), max_align)
29101 && align < max_align)
29105 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29106 to 16byte boundary. */
29109 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29110 && TYPE_SIZE (type)
29111 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29112 && wi::geu_p (TYPE_SIZE (type), 128)
29120 if (TREE_CODE (type) == ARRAY_TYPE)
29122 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29124 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29127 else if (TREE_CODE (type) == COMPLEX_TYPE)
29130 if (TYPE_MODE (type) == DCmode && align < 64)
29132 if ((TYPE_MODE (type) == XCmode
29133 || TYPE_MODE (type) == TCmode) && align < 128)
29136 else if ((TREE_CODE (type) == RECORD_TYPE
29137 || TREE_CODE (type) == UNION_TYPE
29138 || TREE_CODE (type) == QUAL_UNION_TYPE)
29139 && TYPE_FIELDS (type))
29141 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29143 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29146 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29147 || TREE_CODE (type) == INTEGER_TYPE)
29149 if (TYPE_MODE (type) == DFmode && align < 64)
29151 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29158 /* Compute the alignment for a local variable or a stack slot. EXP is
29159 the data type or decl itself, MODE is the widest mode available and
29160 ALIGN is the alignment that the object would ordinarily have. The
29161 value of this macro is used instead of that alignment to align the
29165 ix86_local_alignment (tree exp, machine_mode mode,
29166 unsigned int align)
29170 if (exp && DECL_P (exp))
29172 type = TREE_TYPE (exp);
29181 /* Don't do dynamic stack realignment for long long objects with
29182 -mpreferred-stack-boundary=2. */
29185 && ix86_preferred_stack_boundary < 64
29186 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29187 && (!type || !TYPE_USER_ALIGN (type))
29188 && (!decl || !DECL_USER_ALIGN (decl)))
29191 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29192 register in MODE. We will return the largest alignment of XF
29196 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29197 align = GET_MODE_ALIGNMENT (DFmode);
29201 /* Don't increase alignment for Intel MCU psABI. */
29205 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29206 to 16byte boundary. Exact wording is:
29208 An array uses the same alignment as its elements, except that a local or
29209 global array variable of length at least 16 bytes or
29210 a C99 variable-length array variable always has alignment of at least 16 bytes.
29212 This was added to allow use of aligned SSE instructions at arrays. This
29213 rule is meant for static storage (where compiler can not do the analysis
29214 by itself). We follow it for automatic variables only when convenient.
29215 We fully control everything in the function compiled and functions from
29216 other unit can not rely on the alignment.
29218 Exclude va_list type. It is the common case of local array where
29219 we can not benefit from the alignment.
29221 TODO: Probably one should optimize for size only when var is not escaping. */
29222 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29225 if (AGGREGATE_TYPE_P (type)
29226 && (va_list_type_node == NULL_TREE
29227 || (TYPE_MAIN_VARIANT (type)
29228 != TYPE_MAIN_VARIANT (va_list_type_node)))
29229 && TYPE_SIZE (type)
29230 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29231 && wi::geu_p (TYPE_SIZE (type), 16)
29235 if (TREE_CODE (type) == ARRAY_TYPE)
29237 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29239 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29242 else if (TREE_CODE (type) == COMPLEX_TYPE)
29244 if (TYPE_MODE (type) == DCmode && align < 64)
29246 if ((TYPE_MODE (type) == XCmode
29247 || TYPE_MODE (type) == TCmode) && align < 128)
29250 else if ((TREE_CODE (type) == RECORD_TYPE
29251 || TREE_CODE (type) == UNION_TYPE
29252 || TREE_CODE (type) == QUAL_UNION_TYPE)
29253 && TYPE_FIELDS (type))
29255 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29257 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29260 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29261 || TREE_CODE (type) == INTEGER_TYPE)
29264 if (TYPE_MODE (type) == DFmode && align < 64)
29266 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29272 /* Compute the minimum required alignment for dynamic stack realignment
29273 purposes for a local variable, parameter or a stack slot. EXP is
29274 the data type or decl itself, MODE is its mode and ALIGN is the
29275 alignment that the object would ordinarily have. */
29278 ix86_minimum_alignment (tree exp, machine_mode mode,
29279 unsigned int align)
29283 if (exp && DECL_P (exp))
29285 type = TREE_TYPE (exp);
29294 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29297 /* Don't do dynamic stack realignment for long long objects with
29298 -mpreferred-stack-boundary=2. */
29299 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29300 && (!type || !TYPE_USER_ALIGN (type))
29301 && (!decl || !DECL_USER_ALIGN (decl)))
29307 /* Find a location for the static chain incoming to a nested function.
29308 This is a register, unless all free registers are used by arguments. */
29311 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29315 /* While this function won't be called by the middle-end when a static
29316 chain isn't needed, it's also used throughout the backend so it's
29317 easiest to keep this check centralized. */
29318 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29323 /* We always use R10 in 64-bit mode. */
29328 const_tree fntype, fndecl;
29331 /* By default in 32-bit mode we use ECX to pass the static chain. */
29334 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29336 fntype = TREE_TYPE (fndecl_or_type);
29337 fndecl = fndecl_or_type;
29341 fntype = fndecl_or_type;
29345 ccvt = ix86_get_callcvt (fntype);
29346 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29348 /* Fastcall functions use ecx/edx for arguments, which leaves
29349 us with EAX for the static chain.
29350 Thiscall functions use ecx for arguments, which also
29351 leaves us with EAX for the static chain. */
29354 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29356 /* Thiscall functions use ecx for arguments, which leaves
29357 us with EAX and EDX for the static chain.
29358 We are using for abi-compatibility EAX. */
29361 else if (ix86_function_regparm (fntype, fndecl) == 3)
29363 /* For regparm 3, we have no free call-clobbered registers in
29364 which to store the static chain. In order to implement this,
29365 we have the trampoline push the static chain to the stack.
29366 However, we can't push a value below the return address when
29367 we call the nested function directly, so we have to use an
29368 alternate entry point. For this we use ESI, and have the
29369 alternate entry point push ESI, so that things appear the
29370 same once we're executing the nested function. */
29373 if (fndecl == current_function_decl)
29374 ix86_static_chain_on_stack = true;
29375 return gen_frame_mem (SImode,
29376 plus_constant (Pmode,
29377 arg_pointer_rtx, -8));
29383 return gen_rtx_REG (Pmode, regno);
29386 /* Emit RTL insns to initialize the variable parts of a trampoline.
29387 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29388 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29389 to be passed to the target function. */
29392 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29398 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29404 /* Load the function address to r11. Try to load address using
29405 the shorter movl instead of movabs. We may want to support
29406 movq for kernel mode, but kernel does not use trampolines at
29407 the moment. FNADDR is a 32bit address and may not be in
29408 DImode when ptr_mode == SImode. Always use movl in this
29410 if (ptr_mode == SImode
29411 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29413 fnaddr = copy_addr_to_reg (fnaddr);
29415 mem = adjust_address (m_tramp, HImode, offset);
29416 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29418 mem = adjust_address (m_tramp, SImode, offset + 2);
29419 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29424 mem = adjust_address (m_tramp, HImode, offset);
29425 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29427 mem = adjust_address (m_tramp, DImode, offset + 2);
29428 emit_move_insn (mem, fnaddr);
29432 /* Load static chain using movabs to r10. Use the shorter movl
29433 instead of movabs when ptr_mode == SImode. */
29434 if (ptr_mode == SImode)
29445 mem = adjust_address (m_tramp, HImode, offset);
29446 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29448 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29449 emit_move_insn (mem, chain_value);
29452 /* Jump to r11; the last (unused) byte is a nop, only there to
29453 pad the write out to a single 32-bit store. */
29454 mem = adjust_address (m_tramp, SImode, offset);
29455 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29462 /* Depending on the static chain location, either load a register
29463 with a constant, or push the constant to the stack. All of the
29464 instructions are the same size. */
29465 chain = ix86_static_chain (fndecl, true);
29468 switch (REGNO (chain))
29471 opcode = 0xb8; break;
29473 opcode = 0xb9; break;
29475 gcc_unreachable ();
29481 mem = adjust_address (m_tramp, QImode, offset);
29482 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29484 mem = adjust_address (m_tramp, SImode, offset + 1);
29485 emit_move_insn (mem, chain_value);
29488 mem = adjust_address (m_tramp, QImode, offset);
29489 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29491 mem = adjust_address (m_tramp, SImode, offset + 1);
29493 /* Compute offset from the end of the jmp to the target function.
29494 In the case in which the trampoline stores the static chain on
29495 the stack, we need to skip the first insn which pushes the
29496 (call-saved) register static chain; this push is 1 byte. */
29498 disp = expand_binop (SImode, sub_optab, fnaddr,
29499 plus_constant (Pmode, XEXP (m_tramp, 0),
29500 offset - (MEM_P (chain) ? 1 : 0)),
29501 NULL_RTX, 1, OPTAB_DIRECT);
29502 emit_move_insn (mem, disp);
29505 gcc_assert (offset <= TRAMPOLINE_SIZE);
29507 #ifdef HAVE_ENABLE_EXECUTE_STACK
29508 #ifdef CHECK_EXECUTE_STACK_ENABLED
29509 if (CHECK_EXECUTE_STACK_ENABLED)
29511 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29512 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29516 /* The following file contains several enumerations and data structures
29517 built from the definitions in i386-builtin-types.def. */
29519 #include "i386-builtin-types.inc"
29521 /* Table for the ix86 builtin non-function types. */
29522 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29524 /* Retrieve an element from the above table, building some of
29525 the types lazily. */
29528 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29530 unsigned int index;
29533 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29535 type = ix86_builtin_type_tab[(int) tcode];
29539 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29540 if (tcode <= IX86_BT_LAST_VECT)
29544 index = tcode - IX86_BT_LAST_PRIM - 1;
29545 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29546 mode = ix86_builtin_type_vect_mode[index];
29548 type = build_vector_type_for_mode (itype, mode);
29554 index = tcode - IX86_BT_LAST_VECT - 1;
29555 if (tcode <= IX86_BT_LAST_PTR)
29556 quals = TYPE_UNQUALIFIED;
29558 quals = TYPE_QUAL_CONST;
29560 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29561 if (quals != TYPE_UNQUALIFIED)
29562 itype = build_qualified_type (itype, quals);
29564 type = build_pointer_type (itype);
29567 ix86_builtin_type_tab[(int) tcode] = type;
29571 /* Table for the ix86 builtin function types. */
29572 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29574 /* Retrieve an element from the above table, building some of
29575 the types lazily. */
29578 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29582 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29584 type = ix86_builtin_func_type_tab[(int) tcode];
29588 if (tcode <= IX86_BT_LAST_FUNC)
29590 unsigned start = ix86_builtin_func_start[(int) tcode];
29591 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29592 tree rtype, atype, args = void_list_node;
29595 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29596 for (i = after - 1; i > start; --i)
29598 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29599 args = tree_cons (NULL, atype, args);
29602 type = build_function_type (rtype, args);
29606 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29607 enum ix86_builtin_func_type icode;
29609 icode = ix86_builtin_func_alias_base[index];
29610 type = ix86_get_builtin_func_type (icode);
29613 ix86_builtin_func_type_tab[(int) tcode] = type;
29618 /* Codes for all the SSE/MMX builtins. */
29621 IX86_BUILTIN_ADDPS,
29622 IX86_BUILTIN_ADDSS,
29623 IX86_BUILTIN_DIVPS,
29624 IX86_BUILTIN_DIVSS,
29625 IX86_BUILTIN_MULPS,
29626 IX86_BUILTIN_MULSS,
29627 IX86_BUILTIN_SUBPS,
29628 IX86_BUILTIN_SUBSS,
29630 IX86_BUILTIN_CMPEQPS,
29631 IX86_BUILTIN_CMPLTPS,
29632 IX86_BUILTIN_CMPLEPS,
29633 IX86_BUILTIN_CMPGTPS,
29634 IX86_BUILTIN_CMPGEPS,
29635 IX86_BUILTIN_CMPNEQPS,
29636 IX86_BUILTIN_CMPNLTPS,
29637 IX86_BUILTIN_CMPNLEPS,
29638 IX86_BUILTIN_CMPNGTPS,
29639 IX86_BUILTIN_CMPNGEPS,
29640 IX86_BUILTIN_CMPORDPS,
29641 IX86_BUILTIN_CMPUNORDPS,
29642 IX86_BUILTIN_CMPEQSS,
29643 IX86_BUILTIN_CMPLTSS,
29644 IX86_BUILTIN_CMPLESS,
29645 IX86_BUILTIN_CMPNEQSS,
29646 IX86_BUILTIN_CMPNLTSS,
29647 IX86_BUILTIN_CMPNLESS,
29648 IX86_BUILTIN_CMPORDSS,
29649 IX86_BUILTIN_CMPUNORDSS,
29651 IX86_BUILTIN_COMIEQSS,
29652 IX86_BUILTIN_COMILTSS,
29653 IX86_BUILTIN_COMILESS,
29654 IX86_BUILTIN_COMIGTSS,
29655 IX86_BUILTIN_COMIGESS,
29656 IX86_BUILTIN_COMINEQSS,
29657 IX86_BUILTIN_UCOMIEQSS,
29658 IX86_BUILTIN_UCOMILTSS,
29659 IX86_BUILTIN_UCOMILESS,
29660 IX86_BUILTIN_UCOMIGTSS,
29661 IX86_BUILTIN_UCOMIGESS,
29662 IX86_BUILTIN_UCOMINEQSS,
29664 IX86_BUILTIN_CVTPI2PS,
29665 IX86_BUILTIN_CVTPS2PI,
29666 IX86_BUILTIN_CVTSI2SS,
29667 IX86_BUILTIN_CVTSI642SS,
29668 IX86_BUILTIN_CVTSS2SI,
29669 IX86_BUILTIN_CVTSS2SI64,
29670 IX86_BUILTIN_CVTTPS2PI,
29671 IX86_BUILTIN_CVTTSS2SI,
29672 IX86_BUILTIN_CVTTSS2SI64,
29674 IX86_BUILTIN_MAXPS,
29675 IX86_BUILTIN_MAXSS,
29676 IX86_BUILTIN_MINPS,
29677 IX86_BUILTIN_MINSS,
29679 IX86_BUILTIN_LOADUPS,
29680 IX86_BUILTIN_STOREUPS,
29681 IX86_BUILTIN_MOVSS,
29683 IX86_BUILTIN_MOVHLPS,
29684 IX86_BUILTIN_MOVLHPS,
29685 IX86_BUILTIN_LOADHPS,
29686 IX86_BUILTIN_LOADLPS,
29687 IX86_BUILTIN_STOREHPS,
29688 IX86_BUILTIN_STORELPS,
29690 IX86_BUILTIN_MASKMOVQ,
29691 IX86_BUILTIN_MOVMSKPS,
29692 IX86_BUILTIN_PMOVMSKB,
29694 IX86_BUILTIN_MOVNTPS,
29695 IX86_BUILTIN_MOVNTQ,
29697 IX86_BUILTIN_LOADDQU,
29698 IX86_BUILTIN_STOREDQU,
29700 IX86_BUILTIN_PACKSSWB,
29701 IX86_BUILTIN_PACKSSDW,
29702 IX86_BUILTIN_PACKUSWB,
29704 IX86_BUILTIN_PADDB,
29705 IX86_BUILTIN_PADDW,
29706 IX86_BUILTIN_PADDD,
29707 IX86_BUILTIN_PADDQ,
29708 IX86_BUILTIN_PADDSB,
29709 IX86_BUILTIN_PADDSW,
29710 IX86_BUILTIN_PADDUSB,
29711 IX86_BUILTIN_PADDUSW,
29712 IX86_BUILTIN_PSUBB,
29713 IX86_BUILTIN_PSUBW,
29714 IX86_BUILTIN_PSUBD,
29715 IX86_BUILTIN_PSUBQ,
29716 IX86_BUILTIN_PSUBSB,
29717 IX86_BUILTIN_PSUBSW,
29718 IX86_BUILTIN_PSUBUSB,
29719 IX86_BUILTIN_PSUBUSW,
29722 IX86_BUILTIN_PANDN,
29726 IX86_BUILTIN_PAVGB,
29727 IX86_BUILTIN_PAVGW,
29729 IX86_BUILTIN_PCMPEQB,
29730 IX86_BUILTIN_PCMPEQW,
29731 IX86_BUILTIN_PCMPEQD,
29732 IX86_BUILTIN_PCMPGTB,
29733 IX86_BUILTIN_PCMPGTW,
29734 IX86_BUILTIN_PCMPGTD,
29736 IX86_BUILTIN_PMADDWD,
29738 IX86_BUILTIN_PMAXSW,
29739 IX86_BUILTIN_PMAXUB,
29740 IX86_BUILTIN_PMINSW,
29741 IX86_BUILTIN_PMINUB,
29743 IX86_BUILTIN_PMULHUW,
29744 IX86_BUILTIN_PMULHW,
29745 IX86_BUILTIN_PMULLW,
29747 IX86_BUILTIN_PSADBW,
29748 IX86_BUILTIN_PSHUFW,
29750 IX86_BUILTIN_PSLLW,
29751 IX86_BUILTIN_PSLLD,
29752 IX86_BUILTIN_PSLLQ,
29753 IX86_BUILTIN_PSRAW,
29754 IX86_BUILTIN_PSRAD,
29755 IX86_BUILTIN_PSRLW,
29756 IX86_BUILTIN_PSRLD,
29757 IX86_BUILTIN_PSRLQ,
29758 IX86_BUILTIN_PSLLWI,
29759 IX86_BUILTIN_PSLLDI,
29760 IX86_BUILTIN_PSLLQI,
29761 IX86_BUILTIN_PSRAWI,
29762 IX86_BUILTIN_PSRADI,
29763 IX86_BUILTIN_PSRLWI,
29764 IX86_BUILTIN_PSRLDI,
29765 IX86_BUILTIN_PSRLQI,
29767 IX86_BUILTIN_PUNPCKHBW,
29768 IX86_BUILTIN_PUNPCKHWD,
29769 IX86_BUILTIN_PUNPCKHDQ,
29770 IX86_BUILTIN_PUNPCKLBW,
29771 IX86_BUILTIN_PUNPCKLWD,
29772 IX86_BUILTIN_PUNPCKLDQ,
29774 IX86_BUILTIN_SHUFPS,
29776 IX86_BUILTIN_RCPPS,
29777 IX86_BUILTIN_RCPSS,
29778 IX86_BUILTIN_RSQRTPS,
29779 IX86_BUILTIN_RSQRTPS_NR,
29780 IX86_BUILTIN_RSQRTSS,
29781 IX86_BUILTIN_RSQRTF,
29782 IX86_BUILTIN_SQRTPS,
29783 IX86_BUILTIN_SQRTPS_NR,
29784 IX86_BUILTIN_SQRTSS,
29786 IX86_BUILTIN_UNPCKHPS,
29787 IX86_BUILTIN_UNPCKLPS,
29789 IX86_BUILTIN_ANDPS,
29790 IX86_BUILTIN_ANDNPS,
29792 IX86_BUILTIN_XORPS,
29795 IX86_BUILTIN_LDMXCSR,
29796 IX86_BUILTIN_STMXCSR,
29797 IX86_BUILTIN_SFENCE,
29799 IX86_BUILTIN_FXSAVE,
29800 IX86_BUILTIN_FXRSTOR,
29801 IX86_BUILTIN_FXSAVE64,
29802 IX86_BUILTIN_FXRSTOR64,
29804 IX86_BUILTIN_XSAVE,
29805 IX86_BUILTIN_XRSTOR,
29806 IX86_BUILTIN_XSAVE64,
29807 IX86_BUILTIN_XRSTOR64,
29809 IX86_BUILTIN_XSAVEOPT,
29810 IX86_BUILTIN_XSAVEOPT64,
29812 IX86_BUILTIN_XSAVEC,
29813 IX86_BUILTIN_XSAVEC64,
29815 IX86_BUILTIN_XSAVES,
29816 IX86_BUILTIN_XRSTORS,
29817 IX86_BUILTIN_XSAVES64,
29818 IX86_BUILTIN_XRSTORS64,
29820 /* 3DNow! Original */
29821 IX86_BUILTIN_FEMMS,
29822 IX86_BUILTIN_PAVGUSB,
29823 IX86_BUILTIN_PF2ID,
29824 IX86_BUILTIN_PFACC,
29825 IX86_BUILTIN_PFADD,
29826 IX86_BUILTIN_PFCMPEQ,
29827 IX86_BUILTIN_PFCMPGE,
29828 IX86_BUILTIN_PFCMPGT,
29829 IX86_BUILTIN_PFMAX,
29830 IX86_BUILTIN_PFMIN,
29831 IX86_BUILTIN_PFMUL,
29832 IX86_BUILTIN_PFRCP,
29833 IX86_BUILTIN_PFRCPIT1,
29834 IX86_BUILTIN_PFRCPIT2,
29835 IX86_BUILTIN_PFRSQIT1,
29836 IX86_BUILTIN_PFRSQRT,
29837 IX86_BUILTIN_PFSUB,
29838 IX86_BUILTIN_PFSUBR,
29839 IX86_BUILTIN_PI2FD,
29840 IX86_BUILTIN_PMULHRW,
29842 /* 3DNow! Athlon Extensions */
29843 IX86_BUILTIN_PF2IW,
29844 IX86_BUILTIN_PFNACC,
29845 IX86_BUILTIN_PFPNACC,
29846 IX86_BUILTIN_PI2FW,
29847 IX86_BUILTIN_PSWAPDSI,
29848 IX86_BUILTIN_PSWAPDSF,
29851 IX86_BUILTIN_ADDPD,
29852 IX86_BUILTIN_ADDSD,
29853 IX86_BUILTIN_DIVPD,
29854 IX86_BUILTIN_DIVSD,
29855 IX86_BUILTIN_MULPD,
29856 IX86_BUILTIN_MULSD,
29857 IX86_BUILTIN_SUBPD,
29858 IX86_BUILTIN_SUBSD,
29860 IX86_BUILTIN_CMPEQPD,
29861 IX86_BUILTIN_CMPLTPD,
29862 IX86_BUILTIN_CMPLEPD,
29863 IX86_BUILTIN_CMPGTPD,
29864 IX86_BUILTIN_CMPGEPD,
29865 IX86_BUILTIN_CMPNEQPD,
29866 IX86_BUILTIN_CMPNLTPD,
29867 IX86_BUILTIN_CMPNLEPD,
29868 IX86_BUILTIN_CMPNGTPD,
29869 IX86_BUILTIN_CMPNGEPD,
29870 IX86_BUILTIN_CMPORDPD,
29871 IX86_BUILTIN_CMPUNORDPD,
29872 IX86_BUILTIN_CMPEQSD,
29873 IX86_BUILTIN_CMPLTSD,
29874 IX86_BUILTIN_CMPLESD,
29875 IX86_BUILTIN_CMPNEQSD,
29876 IX86_BUILTIN_CMPNLTSD,
29877 IX86_BUILTIN_CMPNLESD,
29878 IX86_BUILTIN_CMPORDSD,
29879 IX86_BUILTIN_CMPUNORDSD,
29881 IX86_BUILTIN_COMIEQSD,
29882 IX86_BUILTIN_COMILTSD,
29883 IX86_BUILTIN_COMILESD,
29884 IX86_BUILTIN_COMIGTSD,
29885 IX86_BUILTIN_COMIGESD,
29886 IX86_BUILTIN_COMINEQSD,
29887 IX86_BUILTIN_UCOMIEQSD,
29888 IX86_BUILTIN_UCOMILTSD,
29889 IX86_BUILTIN_UCOMILESD,
29890 IX86_BUILTIN_UCOMIGTSD,
29891 IX86_BUILTIN_UCOMIGESD,
29892 IX86_BUILTIN_UCOMINEQSD,
29894 IX86_BUILTIN_MAXPD,
29895 IX86_BUILTIN_MAXSD,
29896 IX86_BUILTIN_MINPD,
29897 IX86_BUILTIN_MINSD,
29899 IX86_BUILTIN_ANDPD,
29900 IX86_BUILTIN_ANDNPD,
29902 IX86_BUILTIN_XORPD,
29904 IX86_BUILTIN_SQRTPD,
29905 IX86_BUILTIN_SQRTSD,
29907 IX86_BUILTIN_UNPCKHPD,
29908 IX86_BUILTIN_UNPCKLPD,
29910 IX86_BUILTIN_SHUFPD,
29912 IX86_BUILTIN_LOADUPD,
29913 IX86_BUILTIN_STOREUPD,
29914 IX86_BUILTIN_MOVSD,
29916 IX86_BUILTIN_LOADHPD,
29917 IX86_BUILTIN_LOADLPD,
29919 IX86_BUILTIN_CVTDQ2PD,
29920 IX86_BUILTIN_CVTDQ2PS,
29922 IX86_BUILTIN_CVTPD2DQ,
29923 IX86_BUILTIN_CVTPD2PI,
29924 IX86_BUILTIN_CVTPD2PS,
29925 IX86_BUILTIN_CVTTPD2DQ,
29926 IX86_BUILTIN_CVTTPD2PI,
29928 IX86_BUILTIN_CVTPI2PD,
29929 IX86_BUILTIN_CVTSI2SD,
29930 IX86_BUILTIN_CVTSI642SD,
29932 IX86_BUILTIN_CVTSD2SI,
29933 IX86_BUILTIN_CVTSD2SI64,
29934 IX86_BUILTIN_CVTSD2SS,
29935 IX86_BUILTIN_CVTSS2SD,
29936 IX86_BUILTIN_CVTTSD2SI,
29937 IX86_BUILTIN_CVTTSD2SI64,
29939 IX86_BUILTIN_CVTPS2DQ,
29940 IX86_BUILTIN_CVTPS2PD,
29941 IX86_BUILTIN_CVTTPS2DQ,
29943 IX86_BUILTIN_MOVNTI,
29944 IX86_BUILTIN_MOVNTI64,
29945 IX86_BUILTIN_MOVNTPD,
29946 IX86_BUILTIN_MOVNTDQ,
29948 IX86_BUILTIN_MOVQ128,
29951 IX86_BUILTIN_MASKMOVDQU,
29952 IX86_BUILTIN_MOVMSKPD,
29953 IX86_BUILTIN_PMOVMSKB128,
29955 IX86_BUILTIN_PACKSSWB128,
29956 IX86_BUILTIN_PACKSSDW128,
29957 IX86_BUILTIN_PACKUSWB128,
29959 IX86_BUILTIN_PADDB128,
29960 IX86_BUILTIN_PADDW128,
29961 IX86_BUILTIN_PADDD128,
29962 IX86_BUILTIN_PADDQ128,
29963 IX86_BUILTIN_PADDSB128,
29964 IX86_BUILTIN_PADDSW128,
29965 IX86_BUILTIN_PADDUSB128,
29966 IX86_BUILTIN_PADDUSW128,
29967 IX86_BUILTIN_PSUBB128,
29968 IX86_BUILTIN_PSUBW128,
29969 IX86_BUILTIN_PSUBD128,
29970 IX86_BUILTIN_PSUBQ128,
29971 IX86_BUILTIN_PSUBSB128,
29972 IX86_BUILTIN_PSUBSW128,
29973 IX86_BUILTIN_PSUBUSB128,
29974 IX86_BUILTIN_PSUBUSW128,
29976 IX86_BUILTIN_PAND128,
29977 IX86_BUILTIN_PANDN128,
29978 IX86_BUILTIN_POR128,
29979 IX86_BUILTIN_PXOR128,
29981 IX86_BUILTIN_PAVGB128,
29982 IX86_BUILTIN_PAVGW128,
29984 IX86_BUILTIN_PCMPEQB128,
29985 IX86_BUILTIN_PCMPEQW128,
29986 IX86_BUILTIN_PCMPEQD128,
29987 IX86_BUILTIN_PCMPGTB128,
29988 IX86_BUILTIN_PCMPGTW128,
29989 IX86_BUILTIN_PCMPGTD128,
29991 IX86_BUILTIN_PMADDWD128,
29993 IX86_BUILTIN_PMAXSW128,
29994 IX86_BUILTIN_PMAXUB128,
29995 IX86_BUILTIN_PMINSW128,
29996 IX86_BUILTIN_PMINUB128,
29998 IX86_BUILTIN_PMULUDQ,
29999 IX86_BUILTIN_PMULUDQ128,
30000 IX86_BUILTIN_PMULHUW128,
30001 IX86_BUILTIN_PMULHW128,
30002 IX86_BUILTIN_PMULLW128,
30004 IX86_BUILTIN_PSADBW128,
30005 IX86_BUILTIN_PSHUFHW,
30006 IX86_BUILTIN_PSHUFLW,
30007 IX86_BUILTIN_PSHUFD,
30009 IX86_BUILTIN_PSLLDQI128,
30010 IX86_BUILTIN_PSLLWI128,
30011 IX86_BUILTIN_PSLLDI128,
30012 IX86_BUILTIN_PSLLQI128,
30013 IX86_BUILTIN_PSRAWI128,
30014 IX86_BUILTIN_PSRADI128,
30015 IX86_BUILTIN_PSRLDQI128,
30016 IX86_BUILTIN_PSRLWI128,
30017 IX86_BUILTIN_PSRLDI128,
30018 IX86_BUILTIN_PSRLQI128,
30020 IX86_BUILTIN_PSLLDQ128,
30021 IX86_BUILTIN_PSLLW128,
30022 IX86_BUILTIN_PSLLD128,
30023 IX86_BUILTIN_PSLLQ128,
30024 IX86_BUILTIN_PSRAW128,
30025 IX86_BUILTIN_PSRAD128,
30026 IX86_BUILTIN_PSRLW128,
30027 IX86_BUILTIN_PSRLD128,
30028 IX86_BUILTIN_PSRLQ128,
30030 IX86_BUILTIN_PUNPCKHBW128,
30031 IX86_BUILTIN_PUNPCKHWD128,
30032 IX86_BUILTIN_PUNPCKHDQ128,
30033 IX86_BUILTIN_PUNPCKHQDQ128,
30034 IX86_BUILTIN_PUNPCKLBW128,
30035 IX86_BUILTIN_PUNPCKLWD128,
30036 IX86_BUILTIN_PUNPCKLDQ128,
30037 IX86_BUILTIN_PUNPCKLQDQ128,
30039 IX86_BUILTIN_CLFLUSH,
30040 IX86_BUILTIN_MFENCE,
30041 IX86_BUILTIN_LFENCE,
30042 IX86_BUILTIN_PAUSE,
30044 IX86_BUILTIN_FNSTENV,
30045 IX86_BUILTIN_FLDENV,
30046 IX86_BUILTIN_FNSTSW,
30047 IX86_BUILTIN_FNCLEX,
30049 IX86_BUILTIN_BSRSI,
30050 IX86_BUILTIN_BSRDI,
30051 IX86_BUILTIN_RDPMC,
30052 IX86_BUILTIN_RDTSC,
30053 IX86_BUILTIN_RDTSCP,
30054 IX86_BUILTIN_ROLQI,
30055 IX86_BUILTIN_ROLHI,
30056 IX86_BUILTIN_RORQI,
30057 IX86_BUILTIN_RORHI,
30060 IX86_BUILTIN_ADDSUBPS,
30061 IX86_BUILTIN_HADDPS,
30062 IX86_BUILTIN_HSUBPS,
30063 IX86_BUILTIN_MOVSHDUP,
30064 IX86_BUILTIN_MOVSLDUP,
30065 IX86_BUILTIN_ADDSUBPD,
30066 IX86_BUILTIN_HADDPD,
30067 IX86_BUILTIN_HSUBPD,
30068 IX86_BUILTIN_LDDQU,
30070 IX86_BUILTIN_MONITOR,
30071 IX86_BUILTIN_MWAIT,
30072 IX86_BUILTIN_CLZERO,
30075 IX86_BUILTIN_PHADDW,
30076 IX86_BUILTIN_PHADDD,
30077 IX86_BUILTIN_PHADDSW,
30078 IX86_BUILTIN_PHSUBW,
30079 IX86_BUILTIN_PHSUBD,
30080 IX86_BUILTIN_PHSUBSW,
30081 IX86_BUILTIN_PMADDUBSW,
30082 IX86_BUILTIN_PMULHRSW,
30083 IX86_BUILTIN_PSHUFB,
30084 IX86_BUILTIN_PSIGNB,
30085 IX86_BUILTIN_PSIGNW,
30086 IX86_BUILTIN_PSIGND,
30087 IX86_BUILTIN_PALIGNR,
30088 IX86_BUILTIN_PABSB,
30089 IX86_BUILTIN_PABSW,
30090 IX86_BUILTIN_PABSD,
30092 IX86_BUILTIN_PHADDW128,
30093 IX86_BUILTIN_PHADDD128,
30094 IX86_BUILTIN_PHADDSW128,
30095 IX86_BUILTIN_PHSUBW128,
30096 IX86_BUILTIN_PHSUBD128,
30097 IX86_BUILTIN_PHSUBSW128,
30098 IX86_BUILTIN_PMADDUBSW128,
30099 IX86_BUILTIN_PMULHRSW128,
30100 IX86_BUILTIN_PSHUFB128,
30101 IX86_BUILTIN_PSIGNB128,
30102 IX86_BUILTIN_PSIGNW128,
30103 IX86_BUILTIN_PSIGND128,
30104 IX86_BUILTIN_PALIGNR128,
30105 IX86_BUILTIN_PABSB128,
30106 IX86_BUILTIN_PABSW128,
30107 IX86_BUILTIN_PABSD128,
30109 /* AMDFAM10 - SSE4A New Instructions. */
30110 IX86_BUILTIN_MOVNTSD,
30111 IX86_BUILTIN_MOVNTSS,
30112 IX86_BUILTIN_EXTRQI,
30113 IX86_BUILTIN_EXTRQ,
30114 IX86_BUILTIN_INSERTQI,
30115 IX86_BUILTIN_INSERTQ,
30118 IX86_BUILTIN_BLENDPD,
30119 IX86_BUILTIN_BLENDPS,
30120 IX86_BUILTIN_BLENDVPD,
30121 IX86_BUILTIN_BLENDVPS,
30122 IX86_BUILTIN_PBLENDVB128,
30123 IX86_BUILTIN_PBLENDW128,
30128 IX86_BUILTIN_INSERTPS128,
30130 IX86_BUILTIN_MOVNTDQA,
30131 IX86_BUILTIN_MPSADBW128,
30132 IX86_BUILTIN_PACKUSDW128,
30133 IX86_BUILTIN_PCMPEQQ,
30134 IX86_BUILTIN_PHMINPOSUW128,
30136 IX86_BUILTIN_PMAXSB128,
30137 IX86_BUILTIN_PMAXSD128,
30138 IX86_BUILTIN_PMAXUD128,
30139 IX86_BUILTIN_PMAXUW128,
30141 IX86_BUILTIN_PMINSB128,
30142 IX86_BUILTIN_PMINSD128,
30143 IX86_BUILTIN_PMINUD128,
30144 IX86_BUILTIN_PMINUW128,
30146 IX86_BUILTIN_PMOVSXBW128,
30147 IX86_BUILTIN_PMOVSXBD128,
30148 IX86_BUILTIN_PMOVSXBQ128,
30149 IX86_BUILTIN_PMOVSXWD128,
30150 IX86_BUILTIN_PMOVSXWQ128,
30151 IX86_BUILTIN_PMOVSXDQ128,
30153 IX86_BUILTIN_PMOVZXBW128,
30154 IX86_BUILTIN_PMOVZXBD128,
30155 IX86_BUILTIN_PMOVZXBQ128,
30156 IX86_BUILTIN_PMOVZXWD128,
30157 IX86_BUILTIN_PMOVZXWQ128,
30158 IX86_BUILTIN_PMOVZXDQ128,
30160 IX86_BUILTIN_PMULDQ128,
30161 IX86_BUILTIN_PMULLD128,
30163 IX86_BUILTIN_ROUNDSD,
30164 IX86_BUILTIN_ROUNDSS,
30166 IX86_BUILTIN_ROUNDPD,
30167 IX86_BUILTIN_ROUNDPS,
30169 IX86_BUILTIN_FLOORPD,
30170 IX86_BUILTIN_CEILPD,
30171 IX86_BUILTIN_TRUNCPD,
30172 IX86_BUILTIN_RINTPD,
30173 IX86_BUILTIN_ROUNDPD_AZ,
30175 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
30176 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
30177 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
30179 IX86_BUILTIN_FLOORPS,
30180 IX86_BUILTIN_CEILPS,
30181 IX86_BUILTIN_TRUNCPS,
30182 IX86_BUILTIN_RINTPS,
30183 IX86_BUILTIN_ROUNDPS_AZ,
30185 IX86_BUILTIN_FLOORPS_SFIX,
30186 IX86_BUILTIN_CEILPS_SFIX,
30187 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
30189 IX86_BUILTIN_PTESTZ,
30190 IX86_BUILTIN_PTESTC,
30191 IX86_BUILTIN_PTESTNZC,
30193 IX86_BUILTIN_VEC_INIT_V2SI,
30194 IX86_BUILTIN_VEC_INIT_V4HI,
30195 IX86_BUILTIN_VEC_INIT_V8QI,
30196 IX86_BUILTIN_VEC_EXT_V2DF,
30197 IX86_BUILTIN_VEC_EXT_V2DI,
30198 IX86_BUILTIN_VEC_EXT_V4SF,
30199 IX86_BUILTIN_VEC_EXT_V4SI,
30200 IX86_BUILTIN_VEC_EXT_V8HI,
30201 IX86_BUILTIN_VEC_EXT_V2SI,
30202 IX86_BUILTIN_VEC_EXT_V4HI,
30203 IX86_BUILTIN_VEC_EXT_V16QI,
30204 IX86_BUILTIN_VEC_SET_V2DI,
30205 IX86_BUILTIN_VEC_SET_V4SF,
30206 IX86_BUILTIN_VEC_SET_V4SI,
30207 IX86_BUILTIN_VEC_SET_V8HI,
30208 IX86_BUILTIN_VEC_SET_V4HI,
30209 IX86_BUILTIN_VEC_SET_V16QI,
30211 IX86_BUILTIN_VEC_PACK_SFIX,
30212 IX86_BUILTIN_VEC_PACK_SFIX256,
30215 IX86_BUILTIN_CRC32QI,
30216 IX86_BUILTIN_CRC32HI,
30217 IX86_BUILTIN_CRC32SI,
30218 IX86_BUILTIN_CRC32DI,
30220 IX86_BUILTIN_PCMPESTRI128,
30221 IX86_BUILTIN_PCMPESTRM128,
30222 IX86_BUILTIN_PCMPESTRA128,
30223 IX86_BUILTIN_PCMPESTRC128,
30224 IX86_BUILTIN_PCMPESTRO128,
30225 IX86_BUILTIN_PCMPESTRS128,
30226 IX86_BUILTIN_PCMPESTRZ128,
30227 IX86_BUILTIN_PCMPISTRI128,
30228 IX86_BUILTIN_PCMPISTRM128,
30229 IX86_BUILTIN_PCMPISTRA128,
30230 IX86_BUILTIN_PCMPISTRC128,
30231 IX86_BUILTIN_PCMPISTRO128,
30232 IX86_BUILTIN_PCMPISTRS128,
30233 IX86_BUILTIN_PCMPISTRZ128,
30235 IX86_BUILTIN_PCMPGTQ,
30237 /* AES instructions */
30238 IX86_BUILTIN_AESENC128,
30239 IX86_BUILTIN_AESENCLAST128,
30240 IX86_BUILTIN_AESDEC128,
30241 IX86_BUILTIN_AESDECLAST128,
30242 IX86_BUILTIN_AESIMC128,
30243 IX86_BUILTIN_AESKEYGENASSIST128,
30245 /* PCLMUL instruction */
30246 IX86_BUILTIN_PCLMULQDQ128,
30249 IX86_BUILTIN_ADDPD256,
30250 IX86_BUILTIN_ADDPS256,
30251 IX86_BUILTIN_ADDSUBPD256,
30252 IX86_BUILTIN_ADDSUBPS256,
30253 IX86_BUILTIN_ANDPD256,
30254 IX86_BUILTIN_ANDPS256,
30255 IX86_BUILTIN_ANDNPD256,
30256 IX86_BUILTIN_ANDNPS256,
30257 IX86_BUILTIN_BLENDPD256,
30258 IX86_BUILTIN_BLENDPS256,
30259 IX86_BUILTIN_BLENDVPD256,
30260 IX86_BUILTIN_BLENDVPS256,
30261 IX86_BUILTIN_DIVPD256,
30262 IX86_BUILTIN_DIVPS256,
30263 IX86_BUILTIN_DPPS256,
30264 IX86_BUILTIN_HADDPD256,
30265 IX86_BUILTIN_HADDPS256,
30266 IX86_BUILTIN_HSUBPD256,
30267 IX86_BUILTIN_HSUBPS256,
30268 IX86_BUILTIN_MAXPD256,
30269 IX86_BUILTIN_MAXPS256,
30270 IX86_BUILTIN_MINPD256,
30271 IX86_BUILTIN_MINPS256,
30272 IX86_BUILTIN_MULPD256,
30273 IX86_BUILTIN_MULPS256,
30274 IX86_BUILTIN_ORPD256,
30275 IX86_BUILTIN_ORPS256,
30276 IX86_BUILTIN_SHUFPD256,
30277 IX86_BUILTIN_SHUFPS256,
30278 IX86_BUILTIN_SUBPD256,
30279 IX86_BUILTIN_SUBPS256,
30280 IX86_BUILTIN_XORPD256,
30281 IX86_BUILTIN_XORPS256,
30282 IX86_BUILTIN_CMPSD,
30283 IX86_BUILTIN_CMPSS,
30284 IX86_BUILTIN_CMPPD,
30285 IX86_BUILTIN_CMPPS,
30286 IX86_BUILTIN_CMPPD256,
30287 IX86_BUILTIN_CMPPS256,
30288 IX86_BUILTIN_CVTDQ2PD256,
30289 IX86_BUILTIN_CVTDQ2PS256,
30290 IX86_BUILTIN_CVTPD2PS256,
30291 IX86_BUILTIN_CVTPS2DQ256,
30292 IX86_BUILTIN_CVTPS2PD256,
30293 IX86_BUILTIN_CVTTPD2DQ256,
30294 IX86_BUILTIN_CVTPD2DQ256,
30295 IX86_BUILTIN_CVTTPS2DQ256,
30296 IX86_BUILTIN_EXTRACTF128PD256,
30297 IX86_BUILTIN_EXTRACTF128PS256,
30298 IX86_BUILTIN_EXTRACTF128SI256,
30299 IX86_BUILTIN_VZEROALL,
30300 IX86_BUILTIN_VZEROUPPER,
30301 IX86_BUILTIN_VPERMILVARPD,
30302 IX86_BUILTIN_VPERMILVARPS,
30303 IX86_BUILTIN_VPERMILVARPD256,
30304 IX86_BUILTIN_VPERMILVARPS256,
30305 IX86_BUILTIN_VPERMILPD,
30306 IX86_BUILTIN_VPERMILPS,
30307 IX86_BUILTIN_VPERMILPD256,
30308 IX86_BUILTIN_VPERMILPS256,
30309 IX86_BUILTIN_VPERMIL2PD,
30310 IX86_BUILTIN_VPERMIL2PS,
30311 IX86_BUILTIN_VPERMIL2PD256,
30312 IX86_BUILTIN_VPERMIL2PS256,
30313 IX86_BUILTIN_VPERM2F128PD256,
30314 IX86_BUILTIN_VPERM2F128PS256,
30315 IX86_BUILTIN_VPERM2F128SI256,
30316 IX86_BUILTIN_VBROADCASTSS,
30317 IX86_BUILTIN_VBROADCASTSD256,
30318 IX86_BUILTIN_VBROADCASTSS256,
30319 IX86_BUILTIN_VBROADCASTPD256,
30320 IX86_BUILTIN_VBROADCASTPS256,
30321 IX86_BUILTIN_VINSERTF128PD256,
30322 IX86_BUILTIN_VINSERTF128PS256,
30323 IX86_BUILTIN_VINSERTF128SI256,
30324 IX86_BUILTIN_LOADUPD256,
30325 IX86_BUILTIN_LOADUPS256,
30326 IX86_BUILTIN_STOREUPD256,
30327 IX86_BUILTIN_STOREUPS256,
30328 IX86_BUILTIN_LDDQU256,
30329 IX86_BUILTIN_MOVNTDQ256,
30330 IX86_BUILTIN_MOVNTPD256,
30331 IX86_BUILTIN_MOVNTPS256,
30332 IX86_BUILTIN_LOADDQU256,
30333 IX86_BUILTIN_STOREDQU256,
30334 IX86_BUILTIN_MASKLOADPD,
30335 IX86_BUILTIN_MASKLOADPS,
30336 IX86_BUILTIN_MASKSTOREPD,
30337 IX86_BUILTIN_MASKSTOREPS,
30338 IX86_BUILTIN_MASKLOADPD256,
30339 IX86_BUILTIN_MASKLOADPS256,
30340 IX86_BUILTIN_MASKSTOREPD256,
30341 IX86_BUILTIN_MASKSTOREPS256,
30342 IX86_BUILTIN_MOVSHDUP256,
30343 IX86_BUILTIN_MOVSLDUP256,
30344 IX86_BUILTIN_MOVDDUP256,
30346 IX86_BUILTIN_SQRTPD256,
30347 IX86_BUILTIN_SQRTPS256,
30348 IX86_BUILTIN_SQRTPS_NR256,
30349 IX86_BUILTIN_RSQRTPS256,
30350 IX86_BUILTIN_RSQRTPS_NR256,
30352 IX86_BUILTIN_RCPPS256,
30354 IX86_BUILTIN_ROUNDPD256,
30355 IX86_BUILTIN_ROUNDPS256,
30357 IX86_BUILTIN_FLOORPD256,
30358 IX86_BUILTIN_CEILPD256,
30359 IX86_BUILTIN_TRUNCPD256,
30360 IX86_BUILTIN_RINTPD256,
30361 IX86_BUILTIN_ROUNDPD_AZ256,
30363 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
30364 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
30365 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
30367 IX86_BUILTIN_FLOORPS256,
30368 IX86_BUILTIN_CEILPS256,
30369 IX86_BUILTIN_TRUNCPS256,
30370 IX86_BUILTIN_RINTPS256,
30371 IX86_BUILTIN_ROUNDPS_AZ256,
30373 IX86_BUILTIN_FLOORPS_SFIX256,
30374 IX86_BUILTIN_CEILPS_SFIX256,
30375 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
30377 IX86_BUILTIN_UNPCKHPD256,
30378 IX86_BUILTIN_UNPCKLPD256,
30379 IX86_BUILTIN_UNPCKHPS256,
30380 IX86_BUILTIN_UNPCKLPS256,
30382 IX86_BUILTIN_SI256_SI,
30383 IX86_BUILTIN_PS256_PS,
30384 IX86_BUILTIN_PD256_PD,
30385 IX86_BUILTIN_SI_SI256,
30386 IX86_BUILTIN_PS_PS256,
30387 IX86_BUILTIN_PD_PD256,
30389 IX86_BUILTIN_VTESTZPD,
30390 IX86_BUILTIN_VTESTCPD,
30391 IX86_BUILTIN_VTESTNZCPD,
30392 IX86_BUILTIN_VTESTZPS,
30393 IX86_BUILTIN_VTESTCPS,
30394 IX86_BUILTIN_VTESTNZCPS,
30395 IX86_BUILTIN_VTESTZPD256,
30396 IX86_BUILTIN_VTESTCPD256,
30397 IX86_BUILTIN_VTESTNZCPD256,
30398 IX86_BUILTIN_VTESTZPS256,
30399 IX86_BUILTIN_VTESTCPS256,
30400 IX86_BUILTIN_VTESTNZCPS256,
30401 IX86_BUILTIN_PTESTZ256,
30402 IX86_BUILTIN_PTESTC256,
30403 IX86_BUILTIN_PTESTNZC256,
30405 IX86_BUILTIN_MOVMSKPD256,
30406 IX86_BUILTIN_MOVMSKPS256,
30409 IX86_BUILTIN_MPSADBW256,
30410 IX86_BUILTIN_PABSB256,
30411 IX86_BUILTIN_PABSW256,
30412 IX86_BUILTIN_PABSD256,
30413 IX86_BUILTIN_PACKSSDW256,
30414 IX86_BUILTIN_PACKSSWB256,
30415 IX86_BUILTIN_PACKUSDW256,
30416 IX86_BUILTIN_PACKUSWB256,
30417 IX86_BUILTIN_PADDB256,
30418 IX86_BUILTIN_PADDW256,
30419 IX86_BUILTIN_PADDD256,
30420 IX86_BUILTIN_PADDQ256,
30421 IX86_BUILTIN_PADDSB256,
30422 IX86_BUILTIN_PADDSW256,
30423 IX86_BUILTIN_PADDUSB256,
30424 IX86_BUILTIN_PADDUSW256,
30425 IX86_BUILTIN_PALIGNR256,
30426 IX86_BUILTIN_AND256I,
30427 IX86_BUILTIN_ANDNOT256I,
30428 IX86_BUILTIN_PAVGB256,
30429 IX86_BUILTIN_PAVGW256,
30430 IX86_BUILTIN_PBLENDVB256,
30431 IX86_BUILTIN_PBLENDVW256,
30432 IX86_BUILTIN_PCMPEQB256,
30433 IX86_BUILTIN_PCMPEQW256,
30434 IX86_BUILTIN_PCMPEQD256,
30435 IX86_BUILTIN_PCMPEQQ256,
30436 IX86_BUILTIN_PCMPGTB256,
30437 IX86_BUILTIN_PCMPGTW256,
30438 IX86_BUILTIN_PCMPGTD256,
30439 IX86_BUILTIN_PCMPGTQ256,
30440 IX86_BUILTIN_PHADDW256,
30441 IX86_BUILTIN_PHADDD256,
30442 IX86_BUILTIN_PHADDSW256,
30443 IX86_BUILTIN_PHSUBW256,
30444 IX86_BUILTIN_PHSUBD256,
30445 IX86_BUILTIN_PHSUBSW256,
30446 IX86_BUILTIN_PMADDUBSW256,
30447 IX86_BUILTIN_PMADDWD256,
30448 IX86_BUILTIN_PMAXSB256,
30449 IX86_BUILTIN_PMAXSW256,
30450 IX86_BUILTIN_PMAXSD256,
30451 IX86_BUILTIN_PMAXUB256,
30452 IX86_BUILTIN_PMAXUW256,
30453 IX86_BUILTIN_PMAXUD256,
30454 IX86_BUILTIN_PMINSB256,
30455 IX86_BUILTIN_PMINSW256,
30456 IX86_BUILTIN_PMINSD256,
30457 IX86_BUILTIN_PMINUB256,
30458 IX86_BUILTIN_PMINUW256,
30459 IX86_BUILTIN_PMINUD256,
30460 IX86_BUILTIN_PMOVMSKB256,
30461 IX86_BUILTIN_PMOVSXBW256,
30462 IX86_BUILTIN_PMOVSXBD256,
30463 IX86_BUILTIN_PMOVSXBQ256,
30464 IX86_BUILTIN_PMOVSXWD256,
30465 IX86_BUILTIN_PMOVSXWQ256,
30466 IX86_BUILTIN_PMOVSXDQ256,
30467 IX86_BUILTIN_PMOVZXBW256,
30468 IX86_BUILTIN_PMOVZXBD256,
30469 IX86_BUILTIN_PMOVZXBQ256,
30470 IX86_BUILTIN_PMOVZXWD256,
30471 IX86_BUILTIN_PMOVZXWQ256,
30472 IX86_BUILTIN_PMOVZXDQ256,
30473 IX86_BUILTIN_PMULDQ256,
30474 IX86_BUILTIN_PMULHRSW256,
30475 IX86_BUILTIN_PMULHUW256,
30476 IX86_BUILTIN_PMULHW256,
30477 IX86_BUILTIN_PMULLW256,
30478 IX86_BUILTIN_PMULLD256,
30479 IX86_BUILTIN_PMULUDQ256,
30480 IX86_BUILTIN_POR256,
30481 IX86_BUILTIN_PSADBW256,
30482 IX86_BUILTIN_PSHUFB256,
30483 IX86_BUILTIN_PSHUFD256,
30484 IX86_BUILTIN_PSHUFHW256,
30485 IX86_BUILTIN_PSHUFLW256,
30486 IX86_BUILTIN_PSIGNB256,
30487 IX86_BUILTIN_PSIGNW256,
30488 IX86_BUILTIN_PSIGND256,
30489 IX86_BUILTIN_PSLLDQI256,
30490 IX86_BUILTIN_PSLLWI256,
30491 IX86_BUILTIN_PSLLW256,
30492 IX86_BUILTIN_PSLLDI256,
30493 IX86_BUILTIN_PSLLD256,
30494 IX86_BUILTIN_PSLLQI256,
30495 IX86_BUILTIN_PSLLQ256,
30496 IX86_BUILTIN_PSRAWI256,
30497 IX86_BUILTIN_PSRAW256,
30498 IX86_BUILTIN_PSRADI256,
30499 IX86_BUILTIN_PSRAD256,
30500 IX86_BUILTIN_PSRLDQI256,
30501 IX86_BUILTIN_PSRLWI256,
30502 IX86_BUILTIN_PSRLW256,
30503 IX86_BUILTIN_PSRLDI256,
30504 IX86_BUILTIN_PSRLD256,
30505 IX86_BUILTIN_PSRLQI256,
30506 IX86_BUILTIN_PSRLQ256,
30507 IX86_BUILTIN_PSUBB256,
30508 IX86_BUILTIN_PSUBW256,
30509 IX86_BUILTIN_PSUBD256,
30510 IX86_BUILTIN_PSUBQ256,
30511 IX86_BUILTIN_PSUBSB256,
30512 IX86_BUILTIN_PSUBSW256,
30513 IX86_BUILTIN_PSUBUSB256,
30514 IX86_BUILTIN_PSUBUSW256,
30515 IX86_BUILTIN_PUNPCKHBW256,
30516 IX86_BUILTIN_PUNPCKHWD256,
30517 IX86_BUILTIN_PUNPCKHDQ256,
30518 IX86_BUILTIN_PUNPCKHQDQ256,
30519 IX86_BUILTIN_PUNPCKLBW256,
30520 IX86_BUILTIN_PUNPCKLWD256,
30521 IX86_BUILTIN_PUNPCKLDQ256,
30522 IX86_BUILTIN_PUNPCKLQDQ256,
30523 IX86_BUILTIN_PXOR256,
30524 IX86_BUILTIN_MOVNTDQA256,
30525 IX86_BUILTIN_VBROADCASTSS_PS,
30526 IX86_BUILTIN_VBROADCASTSS_PS256,
30527 IX86_BUILTIN_VBROADCASTSD_PD256,
30528 IX86_BUILTIN_VBROADCASTSI256,
30529 IX86_BUILTIN_PBLENDD256,
30530 IX86_BUILTIN_PBLENDD128,
30531 IX86_BUILTIN_PBROADCASTB256,
30532 IX86_BUILTIN_PBROADCASTW256,
30533 IX86_BUILTIN_PBROADCASTD256,
30534 IX86_BUILTIN_PBROADCASTQ256,
30535 IX86_BUILTIN_PBROADCASTB128,
30536 IX86_BUILTIN_PBROADCASTW128,
30537 IX86_BUILTIN_PBROADCASTD128,
30538 IX86_BUILTIN_PBROADCASTQ128,
30539 IX86_BUILTIN_VPERMVARSI256,
30540 IX86_BUILTIN_VPERMDF256,
30541 IX86_BUILTIN_VPERMVARSF256,
30542 IX86_BUILTIN_VPERMDI256,
30543 IX86_BUILTIN_VPERMTI256,
30544 IX86_BUILTIN_VEXTRACT128I256,
30545 IX86_BUILTIN_VINSERT128I256,
30546 IX86_BUILTIN_MASKLOADD,
30547 IX86_BUILTIN_MASKLOADQ,
30548 IX86_BUILTIN_MASKLOADD256,
30549 IX86_BUILTIN_MASKLOADQ256,
30550 IX86_BUILTIN_MASKSTORED,
30551 IX86_BUILTIN_MASKSTOREQ,
30552 IX86_BUILTIN_MASKSTORED256,
30553 IX86_BUILTIN_MASKSTOREQ256,
30554 IX86_BUILTIN_PSLLVV4DI,
30555 IX86_BUILTIN_PSLLVV2DI,
30556 IX86_BUILTIN_PSLLVV8SI,
30557 IX86_BUILTIN_PSLLVV4SI,
30558 IX86_BUILTIN_PSRAVV8SI,
30559 IX86_BUILTIN_PSRAVV4SI,
30560 IX86_BUILTIN_PSRLVV4DI,
30561 IX86_BUILTIN_PSRLVV2DI,
30562 IX86_BUILTIN_PSRLVV8SI,
30563 IX86_BUILTIN_PSRLVV4SI,
30565 IX86_BUILTIN_GATHERSIV2DF,
30566 IX86_BUILTIN_GATHERSIV4DF,
30567 IX86_BUILTIN_GATHERDIV2DF,
30568 IX86_BUILTIN_GATHERDIV4DF,
30569 IX86_BUILTIN_GATHERSIV4SF,
30570 IX86_BUILTIN_GATHERSIV8SF,
30571 IX86_BUILTIN_GATHERDIV4SF,
30572 IX86_BUILTIN_GATHERDIV8SF,
30573 IX86_BUILTIN_GATHERSIV2DI,
30574 IX86_BUILTIN_GATHERSIV4DI,
30575 IX86_BUILTIN_GATHERDIV2DI,
30576 IX86_BUILTIN_GATHERDIV4DI,
30577 IX86_BUILTIN_GATHERSIV4SI,
30578 IX86_BUILTIN_GATHERSIV8SI,
30579 IX86_BUILTIN_GATHERDIV4SI,
30580 IX86_BUILTIN_GATHERDIV8SI,
30583 IX86_BUILTIN_SI512_SI256,
30584 IX86_BUILTIN_PD512_PD256,
30585 IX86_BUILTIN_PS512_PS256,
30586 IX86_BUILTIN_SI512_SI,
30587 IX86_BUILTIN_PD512_PD,
30588 IX86_BUILTIN_PS512_PS,
30589 IX86_BUILTIN_ADDPD512,
30590 IX86_BUILTIN_ADDPS512,
30591 IX86_BUILTIN_ADDSD_ROUND,
30592 IX86_BUILTIN_ADDSS_ROUND,
30593 IX86_BUILTIN_ALIGND512,
30594 IX86_BUILTIN_ALIGNQ512,
30595 IX86_BUILTIN_BLENDMD512,
30596 IX86_BUILTIN_BLENDMPD512,
30597 IX86_BUILTIN_BLENDMPS512,
30598 IX86_BUILTIN_BLENDMQ512,
30599 IX86_BUILTIN_BROADCASTF32X4_512,
30600 IX86_BUILTIN_BROADCASTF64X4_512,
30601 IX86_BUILTIN_BROADCASTI32X4_512,
30602 IX86_BUILTIN_BROADCASTI64X4_512,
30603 IX86_BUILTIN_BROADCASTSD512,
30604 IX86_BUILTIN_BROADCASTSS512,
30605 IX86_BUILTIN_CMPD512,
30606 IX86_BUILTIN_CMPPD512,
30607 IX86_BUILTIN_CMPPS512,
30608 IX86_BUILTIN_CMPQ512,
30609 IX86_BUILTIN_CMPSD_MASK,
30610 IX86_BUILTIN_CMPSS_MASK,
30611 IX86_BUILTIN_COMIDF,
30612 IX86_BUILTIN_COMISF,
30613 IX86_BUILTIN_COMPRESSPD512,
30614 IX86_BUILTIN_COMPRESSPDSTORE512,
30615 IX86_BUILTIN_COMPRESSPS512,
30616 IX86_BUILTIN_COMPRESSPSSTORE512,
30617 IX86_BUILTIN_CVTDQ2PD512,
30618 IX86_BUILTIN_CVTDQ2PS512,
30619 IX86_BUILTIN_CVTPD2DQ512,
30620 IX86_BUILTIN_CVTPD2PS512,
30621 IX86_BUILTIN_CVTPD2UDQ512,
30622 IX86_BUILTIN_CVTPH2PS512,
30623 IX86_BUILTIN_CVTPS2DQ512,
30624 IX86_BUILTIN_CVTPS2PD512,
30625 IX86_BUILTIN_CVTPS2PH512,
30626 IX86_BUILTIN_CVTPS2UDQ512,
30627 IX86_BUILTIN_CVTSD2SS_ROUND,
30628 IX86_BUILTIN_CVTSI2SD64,
30629 IX86_BUILTIN_CVTSI2SS32,
30630 IX86_BUILTIN_CVTSI2SS64,
30631 IX86_BUILTIN_CVTSS2SD_ROUND,
30632 IX86_BUILTIN_CVTTPD2DQ512,
30633 IX86_BUILTIN_CVTTPD2UDQ512,
30634 IX86_BUILTIN_CVTTPS2DQ512,
30635 IX86_BUILTIN_CVTTPS2UDQ512,
30636 IX86_BUILTIN_CVTUDQ2PD512,
30637 IX86_BUILTIN_CVTUDQ2PS512,
30638 IX86_BUILTIN_CVTUSI2SD32,
30639 IX86_BUILTIN_CVTUSI2SD64,
30640 IX86_BUILTIN_CVTUSI2SS32,
30641 IX86_BUILTIN_CVTUSI2SS64,
30642 IX86_BUILTIN_DIVPD512,
30643 IX86_BUILTIN_DIVPS512,
30644 IX86_BUILTIN_DIVSD_ROUND,
30645 IX86_BUILTIN_DIVSS_ROUND,
30646 IX86_BUILTIN_EXPANDPD512,
30647 IX86_BUILTIN_EXPANDPD512Z,
30648 IX86_BUILTIN_EXPANDPDLOAD512,
30649 IX86_BUILTIN_EXPANDPDLOAD512Z,
30650 IX86_BUILTIN_EXPANDPS512,
30651 IX86_BUILTIN_EXPANDPS512Z,
30652 IX86_BUILTIN_EXPANDPSLOAD512,
30653 IX86_BUILTIN_EXPANDPSLOAD512Z,
30654 IX86_BUILTIN_EXTRACTF32X4,
30655 IX86_BUILTIN_EXTRACTF64X4,
30656 IX86_BUILTIN_EXTRACTI32X4,
30657 IX86_BUILTIN_EXTRACTI64X4,
30658 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30659 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30660 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30661 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30662 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30663 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30664 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30665 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30666 IX86_BUILTIN_GETEXPPD512,
30667 IX86_BUILTIN_GETEXPPS512,
30668 IX86_BUILTIN_GETEXPSD128,
30669 IX86_BUILTIN_GETEXPSS128,
30670 IX86_BUILTIN_GETMANTPD512,
30671 IX86_BUILTIN_GETMANTPS512,
30672 IX86_BUILTIN_GETMANTSD128,
30673 IX86_BUILTIN_GETMANTSS128,
30674 IX86_BUILTIN_INSERTF32X4,
30675 IX86_BUILTIN_INSERTF64X4,
30676 IX86_BUILTIN_INSERTI32X4,
30677 IX86_BUILTIN_INSERTI64X4,
30678 IX86_BUILTIN_LOADAPD512,
30679 IX86_BUILTIN_LOADAPS512,
30680 IX86_BUILTIN_LOADDQUDI512,
30681 IX86_BUILTIN_LOADDQUSI512,
30682 IX86_BUILTIN_LOADUPD512,
30683 IX86_BUILTIN_LOADUPS512,
30684 IX86_BUILTIN_MAXPD512,
30685 IX86_BUILTIN_MAXPS512,
30686 IX86_BUILTIN_MAXSD_ROUND,
30687 IX86_BUILTIN_MAXSS_ROUND,
30688 IX86_BUILTIN_MINPD512,
30689 IX86_BUILTIN_MINPS512,
30690 IX86_BUILTIN_MINSD_ROUND,
30691 IX86_BUILTIN_MINSS_ROUND,
30692 IX86_BUILTIN_MOVAPD512,
30693 IX86_BUILTIN_MOVAPS512,
30694 IX86_BUILTIN_MOVDDUP512,
30695 IX86_BUILTIN_MOVDQA32LOAD512,
30696 IX86_BUILTIN_MOVDQA32STORE512,
30697 IX86_BUILTIN_MOVDQA32_512,
30698 IX86_BUILTIN_MOVDQA64LOAD512,
30699 IX86_BUILTIN_MOVDQA64STORE512,
30700 IX86_BUILTIN_MOVDQA64_512,
30701 IX86_BUILTIN_MOVNTDQ512,
30702 IX86_BUILTIN_MOVNTDQA512,
30703 IX86_BUILTIN_MOVNTPD512,
30704 IX86_BUILTIN_MOVNTPS512,
30705 IX86_BUILTIN_MOVSHDUP512,
30706 IX86_BUILTIN_MOVSLDUP512,
30707 IX86_BUILTIN_MULPD512,
30708 IX86_BUILTIN_MULPS512,
30709 IX86_BUILTIN_MULSD_ROUND,
30710 IX86_BUILTIN_MULSS_ROUND,
30711 IX86_BUILTIN_PABSD512,
30712 IX86_BUILTIN_PABSQ512,
30713 IX86_BUILTIN_PADDD512,
30714 IX86_BUILTIN_PADDQ512,
30715 IX86_BUILTIN_PANDD512,
30716 IX86_BUILTIN_PANDND512,
30717 IX86_BUILTIN_PANDNQ512,
30718 IX86_BUILTIN_PANDQ512,
30719 IX86_BUILTIN_PBROADCASTD512,
30720 IX86_BUILTIN_PBROADCASTD512_GPR,
30721 IX86_BUILTIN_PBROADCASTMB512,
30722 IX86_BUILTIN_PBROADCASTMW512,
30723 IX86_BUILTIN_PBROADCASTQ512,
30724 IX86_BUILTIN_PBROADCASTQ512_GPR,
30725 IX86_BUILTIN_PCMPEQD512_MASK,
30726 IX86_BUILTIN_PCMPEQQ512_MASK,
30727 IX86_BUILTIN_PCMPGTD512_MASK,
30728 IX86_BUILTIN_PCMPGTQ512_MASK,
30729 IX86_BUILTIN_PCOMPRESSD512,
30730 IX86_BUILTIN_PCOMPRESSDSTORE512,
30731 IX86_BUILTIN_PCOMPRESSQ512,
30732 IX86_BUILTIN_PCOMPRESSQSTORE512,
30733 IX86_BUILTIN_PEXPANDD512,
30734 IX86_BUILTIN_PEXPANDD512Z,
30735 IX86_BUILTIN_PEXPANDDLOAD512,
30736 IX86_BUILTIN_PEXPANDDLOAD512Z,
30737 IX86_BUILTIN_PEXPANDQ512,
30738 IX86_BUILTIN_PEXPANDQ512Z,
30739 IX86_BUILTIN_PEXPANDQLOAD512,
30740 IX86_BUILTIN_PEXPANDQLOAD512Z,
30741 IX86_BUILTIN_PMAXSD512,
30742 IX86_BUILTIN_PMAXSQ512,
30743 IX86_BUILTIN_PMAXUD512,
30744 IX86_BUILTIN_PMAXUQ512,
30745 IX86_BUILTIN_PMINSD512,
30746 IX86_BUILTIN_PMINSQ512,
30747 IX86_BUILTIN_PMINUD512,
30748 IX86_BUILTIN_PMINUQ512,
30749 IX86_BUILTIN_PMOVDB512,
30750 IX86_BUILTIN_PMOVDB512_MEM,
30751 IX86_BUILTIN_PMOVDW512,
30752 IX86_BUILTIN_PMOVDW512_MEM,
30753 IX86_BUILTIN_PMOVQB512,
30754 IX86_BUILTIN_PMOVQB512_MEM,
30755 IX86_BUILTIN_PMOVQD512,
30756 IX86_BUILTIN_PMOVQD512_MEM,
30757 IX86_BUILTIN_PMOVQW512,
30758 IX86_BUILTIN_PMOVQW512_MEM,
30759 IX86_BUILTIN_PMOVSDB512,
30760 IX86_BUILTIN_PMOVSDB512_MEM,
30761 IX86_BUILTIN_PMOVSDW512,
30762 IX86_BUILTIN_PMOVSDW512_MEM,
30763 IX86_BUILTIN_PMOVSQB512,
30764 IX86_BUILTIN_PMOVSQB512_MEM,
30765 IX86_BUILTIN_PMOVSQD512,
30766 IX86_BUILTIN_PMOVSQD512_MEM,
30767 IX86_BUILTIN_PMOVSQW512,
30768 IX86_BUILTIN_PMOVSQW512_MEM,
30769 IX86_BUILTIN_PMOVSXBD512,
30770 IX86_BUILTIN_PMOVSXBQ512,
30771 IX86_BUILTIN_PMOVSXDQ512,
30772 IX86_BUILTIN_PMOVSXWD512,
30773 IX86_BUILTIN_PMOVSXWQ512,
30774 IX86_BUILTIN_PMOVUSDB512,
30775 IX86_BUILTIN_PMOVUSDB512_MEM,
30776 IX86_BUILTIN_PMOVUSDW512,
30777 IX86_BUILTIN_PMOVUSDW512_MEM,
30778 IX86_BUILTIN_PMOVUSQB512,
30779 IX86_BUILTIN_PMOVUSQB512_MEM,
30780 IX86_BUILTIN_PMOVUSQD512,
30781 IX86_BUILTIN_PMOVUSQD512_MEM,
30782 IX86_BUILTIN_PMOVUSQW512,
30783 IX86_BUILTIN_PMOVUSQW512_MEM,
30784 IX86_BUILTIN_PMOVZXBD512,
30785 IX86_BUILTIN_PMOVZXBQ512,
30786 IX86_BUILTIN_PMOVZXDQ512,
30787 IX86_BUILTIN_PMOVZXWD512,
30788 IX86_BUILTIN_PMOVZXWQ512,
30789 IX86_BUILTIN_PMULDQ512,
30790 IX86_BUILTIN_PMULLD512,
30791 IX86_BUILTIN_PMULUDQ512,
30792 IX86_BUILTIN_PORD512,
30793 IX86_BUILTIN_PORQ512,
30794 IX86_BUILTIN_PROLD512,
30795 IX86_BUILTIN_PROLQ512,
30796 IX86_BUILTIN_PROLVD512,
30797 IX86_BUILTIN_PROLVQ512,
30798 IX86_BUILTIN_PRORD512,
30799 IX86_BUILTIN_PRORQ512,
30800 IX86_BUILTIN_PRORVD512,
30801 IX86_BUILTIN_PRORVQ512,
30802 IX86_BUILTIN_PSHUFD512,
30803 IX86_BUILTIN_PSLLD512,
30804 IX86_BUILTIN_PSLLDI512,
30805 IX86_BUILTIN_PSLLQ512,
30806 IX86_BUILTIN_PSLLQI512,
30807 IX86_BUILTIN_PSLLVV16SI,
30808 IX86_BUILTIN_PSLLVV8DI,
30809 IX86_BUILTIN_PSRAD512,
30810 IX86_BUILTIN_PSRADI512,
30811 IX86_BUILTIN_PSRAQ512,
30812 IX86_BUILTIN_PSRAQI512,
30813 IX86_BUILTIN_PSRAVV16SI,
30814 IX86_BUILTIN_PSRAVV8DI,
30815 IX86_BUILTIN_PSRLD512,
30816 IX86_BUILTIN_PSRLDI512,
30817 IX86_BUILTIN_PSRLQ512,
30818 IX86_BUILTIN_PSRLQI512,
30819 IX86_BUILTIN_PSRLVV16SI,
30820 IX86_BUILTIN_PSRLVV8DI,
30821 IX86_BUILTIN_PSUBD512,
30822 IX86_BUILTIN_PSUBQ512,
30823 IX86_BUILTIN_PTESTMD512,
30824 IX86_BUILTIN_PTESTMQ512,
30825 IX86_BUILTIN_PTESTNMD512,
30826 IX86_BUILTIN_PTESTNMQ512,
30827 IX86_BUILTIN_PUNPCKHDQ512,
30828 IX86_BUILTIN_PUNPCKHQDQ512,
30829 IX86_BUILTIN_PUNPCKLDQ512,
30830 IX86_BUILTIN_PUNPCKLQDQ512,
30831 IX86_BUILTIN_PXORD512,
30832 IX86_BUILTIN_PXORQ512,
30833 IX86_BUILTIN_RCP14PD512,
30834 IX86_BUILTIN_RCP14PS512,
30835 IX86_BUILTIN_RCP14SD,
30836 IX86_BUILTIN_RCP14SS,
30837 IX86_BUILTIN_RNDSCALEPD,
30838 IX86_BUILTIN_RNDSCALEPS,
30839 IX86_BUILTIN_RNDSCALESD,
30840 IX86_BUILTIN_RNDSCALESS,
30841 IX86_BUILTIN_RSQRT14PD512,
30842 IX86_BUILTIN_RSQRT14PS512,
30843 IX86_BUILTIN_RSQRT14SD,
30844 IX86_BUILTIN_RSQRT14SS,
30845 IX86_BUILTIN_SCALEFPD512,
30846 IX86_BUILTIN_SCALEFPS512,
30847 IX86_BUILTIN_SCALEFSD,
30848 IX86_BUILTIN_SCALEFSS,
30849 IX86_BUILTIN_SHUFPD512,
30850 IX86_BUILTIN_SHUFPS512,
30851 IX86_BUILTIN_SHUF_F32x4,
30852 IX86_BUILTIN_SHUF_F64x2,
30853 IX86_BUILTIN_SHUF_I32x4,
30854 IX86_BUILTIN_SHUF_I64x2,
30855 IX86_BUILTIN_SQRTPD512,
30856 IX86_BUILTIN_SQRTPD512_MASK,
30857 IX86_BUILTIN_SQRTPS512_MASK,
30858 IX86_BUILTIN_SQRTPS_NR512,
30859 IX86_BUILTIN_SQRTSD_ROUND,
30860 IX86_BUILTIN_SQRTSS_ROUND,
30861 IX86_BUILTIN_STOREAPD512,
30862 IX86_BUILTIN_STOREAPS512,
30863 IX86_BUILTIN_STOREDQUDI512,
30864 IX86_BUILTIN_STOREDQUSI512,
30865 IX86_BUILTIN_STOREUPD512,
30866 IX86_BUILTIN_STOREUPS512,
30867 IX86_BUILTIN_SUBPD512,
30868 IX86_BUILTIN_SUBPS512,
30869 IX86_BUILTIN_SUBSD_ROUND,
30870 IX86_BUILTIN_SUBSS_ROUND,
30871 IX86_BUILTIN_UCMPD512,
30872 IX86_BUILTIN_UCMPQ512,
30873 IX86_BUILTIN_UNPCKHPD512,
30874 IX86_BUILTIN_UNPCKHPS512,
30875 IX86_BUILTIN_UNPCKLPD512,
30876 IX86_BUILTIN_UNPCKLPS512,
30877 IX86_BUILTIN_VCVTSD2SI32,
30878 IX86_BUILTIN_VCVTSD2SI64,
30879 IX86_BUILTIN_VCVTSD2USI32,
30880 IX86_BUILTIN_VCVTSD2USI64,
30881 IX86_BUILTIN_VCVTSS2SI32,
30882 IX86_BUILTIN_VCVTSS2SI64,
30883 IX86_BUILTIN_VCVTSS2USI32,
30884 IX86_BUILTIN_VCVTSS2USI64,
30885 IX86_BUILTIN_VCVTTSD2SI32,
30886 IX86_BUILTIN_VCVTTSD2SI64,
30887 IX86_BUILTIN_VCVTTSD2USI32,
30888 IX86_BUILTIN_VCVTTSD2USI64,
30889 IX86_BUILTIN_VCVTTSS2SI32,
30890 IX86_BUILTIN_VCVTTSS2SI64,
30891 IX86_BUILTIN_VCVTTSS2USI32,
30892 IX86_BUILTIN_VCVTTSS2USI64,
30893 IX86_BUILTIN_VFMADDPD512_MASK,
30894 IX86_BUILTIN_VFMADDPD512_MASK3,
30895 IX86_BUILTIN_VFMADDPD512_MASKZ,
30896 IX86_BUILTIN_VFMADDPS512_MASK,
30897 IX86_BUILTIN_VFMADDPS512_MASK3,
30898 IX86_BUILTIN_VFMADDPS512_MASKZ,
30899 IX86_BUILTIN_VFMADDSD3_ROUND,
30900 IX86_BUILTIN_VFMADDSS3_ROUND,
30901 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30902 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30903 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30904 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30905 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30906 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30907 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30908 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30909 IX86_BUILTIN_VFMSUBPD512_MASK3,
30910 IX86_BUILTIN_VFMSUBPS512_MASK3,
30911 IX86_BUILTIN_VFMSUBSD3_MASK3,
30912 IX86_BUILTIN_VFMSUBSS3_MASK3,
30913 IX86_BUILTIN_VFNMADDPD512_MASK,
30914 IX86_BUILTIN_VFNMADDPS512_MASK,
30915 IX86_BUILTIN_VFNMSUBPD512_MASK,
30916 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30917 IX86_BUILTIN_VFNMSUBPS512_MASK,
30918 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30919 IX86_BUILTIN_VPCLZCNTD512,
30920 IX86_BUILTIN_VPCLZCNTQ512,
30921 IX86_BUILTIN_VPCONFLICTD512,
30922 IX86_BUILTIN_VPCONFLICTQ512,
30923 IX86_BUILTIN_VPERMDF512,
30924 IX86_BUILTIN_VPERMDI512,
30925 IX86_BUILTIN_VPERMI2VARD512,
30926 IX86_BUILTIN_VPERMI2VARPD512,
30927 IX86_BUILTIN_VPERMI2VARPS512,
30928 IX86_BUILTIN_VPERMI2VARQ512,
30929 IX86_BUILTIN_VPERMILPD512,
30930 IX86_BUILTIN_VPERMILPS512,
30931 IX86_BUILTIN_VPERMILVARPD512,
30932 IX86_BUILTIN_VPERMILVARPS512,
30933 IX86_BUILTIN_VPERMT2VARD512,
30934 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30935 IX86_BUILTIN_VPERMT2VARPD512,
30936 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
30937 IX86_BUILTIN_VPERMT2VARPS512,
30938 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
30939 IX86_BUILTIN_VPERMT2VARQ512,
30940 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
30941 IX86_BUILTIN_VPERMVARDF512,
30942 IX86_BUILTIN_VPERMVARDI512,
30943 IX86_BUILTIN_VPERMVARSF512,
30944 IX86_BUILTIN_VPERMVARSI512,
30945 IX86_BUILTIN_VTERNLOGD512_MASK,
30946 IX86_BUILTIN_VTERNLOGD512_MASKZ,
30947 IX86_BUILTIN_VTERNLOGQ512_MASK,
30948 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
30950 /* Mask arithmetic operations */
30951 IX86_BUILTIN_KAND16,
30952 IX86_BUILTIN_KANDN16,
30953 IX86_BUILTIN_KNOT16,
30954 IX86_BUILTIN_KOR16,
30955 IX86_BUILTIN_KORTESTC16,
30956 IX86_BUILTIN_KORTESTZ16,
30957 IX86_BUILTIN_KUNPCKBW,
30958 IX86_BUILTIN_KXNOR16,
30959 IX86_BUILTIN_KXOR16,
30960 IX86_BUILTIN_KMOV16,
30963 IX86_BUILTIN_PMOVUSQD256_MEM,
30964 IX86_BUILTIN_PMOVUSQD128_MEM,
30965 IX86_BUILTIN_PMOVSQD256_MEM,
30966 IX86_BUILTIN_PMOVSQD128_MEM,
30967 IX86_BUILTIN_PMOVQD256_MEM,
30968 IX86_BUILTIN_PMOVQD128_MEM,
30969 IX86_BUILTIN_PMOVUSQW256_MEM,
30970 IX86_BUILTIN_PMOVUSQW128_MEM,
30971 IX86_BUILTIN_PMOVSQW256_MEM,
30972 IX86_BUILTIN_PMOVSQW128_MEM,
30973 IX86_BUILTIN_PMOVQW256_MEM,
30974 IX86_BUILTIN_PMOVQW128_MEM,
30975 IX86_BUILTIN_PMOVUSQB256_MEM,
30976 IX86_BUILTIN_PMOVUSQB128_MEM,
30977 IX86_BUILTIN_PMOVSQB256_MEM,
30978 IX86_BUILTIN_PMOVSQB128_MEM,
30979 IX86_BUILTIN_PMOVQB256_MEM,
30980 IX86_BUILTIN_PMOVQB128_MEM,
30981 IX86_BUILTIN_PMOVUSDW256_MEM,
30982 IX86_BUILTIN_PMOVUSDW128_MEM,
30983 IX86_BUILTIN_PMOVSDW256_MEM,
30984 IX86_BUILTIN_PMOVSDW128_MEM,
30985 IX86_BUILTIN_PMOVDW256_MEM,
30986 IX86_BUILTIN_PMOVDW128_MEM,
30987 IX86_BUILTIN_PMOVUSDB256_MEM,
30988 IX86_BUILTIN_PMOVUSDB128_MEM,
30989 IX86_BUILTIN_PMOVSDB256_MEM,
30990 IX86_BUILTIN_PMOVSDB128_MEM,
30991 IX86_BUILTIN_PMOVDB256_MEM,
30992 IX86_BUILTIN_PMOVDB128_MEM,
30993 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
30994 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
30995 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
30996 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
30997 IX86_BUILTIN_MOVDQA64STORE256_MASK,
30998 IX86_BUILTIN_MOVDQA64STORE128_MASK,
30999 IX86_BUILTIN_MOVDQA32STORE256_MASK,
31000 IX86_BUILTIN_MOVDQA32STORE128_MASK,
31001 IX86_BUILTIN_LOADAPD256_MASK,
31002 IX86_BUILTIN_LOADAPD128_MASK,
31003 IX86_BUILTIN_LOADAPS256_MASK,
31004 IX86_BUILTIN_LOADAPS128_MASK,
31005 IX86_BUILTIN_STOREAPD256_MASK,
31006 IX86_BUILTIN_STOREAPD128_MASK,
31007 IX86_BUILTIN_STOREAPS256_MASK,
31008 IX86_BUILTIN_STOREAPS128_MASK,
31009 IX86_BUILTIN_LOADUPD256_MASK,
31010 IX86_BUILTIN_LOADUPD128_MASK,
31011 IX86_BUILTIN_LOADUPS256_MASK,
31012 IX86_BUILTIN_LOADUPS128_MASK,
31013 IX86_BUILTIN_STOREUPD256_MASK,
31014 IX86_BUILTIN_STOREUPD128_MASK,
31015 IX86_BUILTIN_STOREUPS256_MASK,
31016 IX86_BUILTIN_STOREUPS128_MASK,
31017 IX86_BUILTIN_LOADDQUDI256_MASK,
31018 IX86_BUILTIN_LOADDQUDI128_MASK,
31019 IX86_BUILTIN_LOADDQUSI256_MASK,
31020 IX86_BUILTIN_LOADDQUSI128_MASK,
31021 IX86_BUILTIN_LOADDQUHI256_MASK,
31022 IX86_BUILTIN_LOADDQUHI128_MASK,
31023 IX86_BUILTIN_LOADDQUQI256_MASK,
31024 IX86_BUILTIN_LOADDQUQI128_MASK,
31025 IX86_BUILTIN_STOREDQUDI256_MASK,
31026 IX86_BUILTIN_STOREDQUDI128_MASK,
31027 IX86_BUILTIN_STOREDQUSI256_MASK,
31028 IX86_BUILTIN_STOREDQUSI128_MASK,
31029 IX86_BUILTIN_STOREDQUHI256_MASK,
31030 IX86_BUILTIN_STOREDQUHI128_MASK,
31031 IX86_BUILTIN_STOREDQUQI256_MASK,
31032 IX86_BUILTIN_STOREDQUQI128_MASK,
31033 IX86_BUILTIN_COMPRESSPDSTORE256,
31034 IX86_BUILTIN_COMPRESSPDSTORE128,
31035 IX86_BUILTIN_COMPRESSPSSTORE256,
31036 IX86_BUILTIN_COMPRESSPSSTORE128,
31037 IX86_BUILTIN_PCOMPRESSQSTORE256,
31038 IX86_BUILTIN_PCOMPRESSQSTORE128,
31039 IX86_BUILTIN_PCOMPRESSDSTORE256,
31040 IX86_BUILTIN_PCOMPRESSDSTORE128,
31041 IX86_BUILTIN_EXPANDPDLOAD256,
31042 IX86_BUILTIN_EXPANDPDLOAD128,
31043 IX86_BUILTIN_EXPANDPSLOAD256,
31044 IX86_BUILTIN_EXPANDPSLOAD128,
31045 IX86_BUILTIN_PEXPANDQLOAD256,
31046 IX86_BUILTIN_PEXPANDQLOAD128,
31047 IX86_BUILTIN_PEXPANDDLOAD256,
31048 IX86_BUILTIN_PEXPANDDLOAD128,
31049 IX86_BUILTIN_EXPANDPDLOAD256Z,
31050 IX86_BUILTIN_EXPANDPDLOAD128Z,
31051 IX86_BUILTIN_EXPANDPSLOAD256Z,
31052 IX86_BUILTIN_EXPANDPSLOAD128Z,
31053 IX86_BUILTIN_PEXPANDQLOAD256Z,
31054 IX86_BUILTIN_PEXPANDQLOAD128Z,
31055 IX86_BUILTIN_PEXPANDDLOAD256Z,
31056 IX86_BUILTIN_PEXPANDDLOAD128Z,
31057 IX86_BUILTIN_PALIGNR256_MASK,
31058 IX86_BUILTIN_PALIGNR128_MASK,
31059 IX86_BUILTIN_MOVDQA64_256_MASK,
31060 IX86_BUILTIN_MOVDQA64_128_MASK,
31061 IX86_BUILTIN_MOVDQA32_256_MASK,
31062 IX86_BUILTIN_MOVDQA32_128_MASK,
31063 IX86_BUILTIN_MOVAPD256_MASK,
31064 IX86_BUILTIN_MOVAPD128_MASK,
31065 IX86_BUILTIN_MOVAPS256_MASK,
31066 IX86_BUILTIN_MOVAPS128_MASK,
31067 IX86_BUILTIN_MOVDQUHI256_MASK,
31068 IX86_BUILTIN_MOVDQUHI128_MASK,
31069 IX86_BUILTIN_MOVDQUQI256_MASK,
31070 IX86_BUILTIN_MOVDQUQI128_MASK,
31071 IX86_BUILTIN_MINPS128_MASK,
31072 IX86_BUILTIN_MAXPS128_MASK,
31073 IX86_BUILTIN_MINPD128_MASK,
31074 IX86_BUILTIN_MAXPD128_MASK,
31075 IX86_BUILTIN_MAXPD256_MASK,
31076 IX86_BUILTIN_MAXPS256_MASK,
31077 IX86_BUILTIN_MINPD256_MASK,
31078 IX86_BUILTIN_MINPS256_MASK,
31079 IX86_BUILTIN_MULPS128_MASK,
31080 IX86_BUILTIN_DIVPS128_MASK,
31081 IX86_BUILTIN_MULPD128_MASK,
31082 IX86_BUILTIN_DIVPD128_MASK,
31083 IX86_BUILTIN_DIVPD256_MASK,
31084 IX86_BUILTIN_DIVPS256_MASK,
31085 IX86_BUILTIN_MULPD256_MASK,
31086 IX86_BUILTIN_MULPS256_MASK,
31087 IX86_BUILTIN_ADDPD128_MASK,
31088 IX86_BUILTIN_ADDPD256_MASK,
31089 IX86_BUILTIN_ADDPS128_MASK,
31090 IX86_BUILTIN_ADDPS256_MASK,
31091 IX86_BUILTIN_SUBPD128_MASK,
31092 IX86_BUILTIN_SUBPD256_MASK,
31093 IX86_BUILTIN_SUBPS128_MASK,
31094 IX86_BUILTIN_SUBPS256_MASK,
31095 IX86_BUILTIN_XORPD256_MASK,
31096 IX86_BUILTIN_XORPD128_MASK,
31097 IX86_BUILTIN_XORPS256_MASK,
31098 IX86_BUILTIN_XORPS128_MASK,
31099 IX86_BUILTIN_ORPD256_MASK,
31100 IX86_BUILTIN_ORPD128_MASK,
31101 IX86_BUILTIN_ORPS256_MASK,
31102 IX86_BUILTIN_ORPS128_MASK,
31103 IX86_BUILTIN_BROADCASTF32x2_256,
31104 IX86_BUILTIN_BROADCASTI32x2_256,
31105 IX86_BUILTIN_BROADCASTI32x2_128,
31106 IX86_BUILTIN_BROADCASTF64X2_256,
31107 IX86_BUILTIN_BROADCASTI64X2_256,
31108 IX86_BUILTIN_BROADCASTF32X4_256,
31109 IX86_BUILTIN_BROADCASTI32X4_256,
31110 IX86_BUILTIN_EXTRACTF32X4_256,
31111 IX86_BUILTIN_EXTRACTI32X4_256,
31112 IX86_BUILTIN_DBPSADBW256,
31113 IX86_BUILTIN_DBPSADBW128,
31114 IX86_BUILTIN_CVTTPD2QQ256,
31115 IX86_BUILTIN_CVTTPD2QQ128,
31116 IX86_BUILTIN_CVTTPD2UQQ256,
31117 IX86_BUILTIN_CVTTPD2UQQ128,
31118 IX86_BUILTIN_CVTPD2QQ256,
31119 IX86_BUILTIN_CVTPD2QQ128,
31120 IX86_BUILTIN_CVTPD2UQQ256,
31121 IX86_BUILTIN_CVTPD2UQQ128,
31122 IX86_BUILTIN_CVTPD2UDQ256_MASK,
31123 IX86_BUILTIN_CVTPD2UDQ128_MASK,
31124 IX86_BUILTIN_CVTTPS2QQ256,
31125 IX86_BUILTIN_CVTTPS2QQ128,
31126 IX86_BUILTIN_CVTTPS2UQQ256,
31127 IX86_BUILTIN_CVTTPS2UQQ128,
31128 IX86_BUILTIN_CVTTPS2DQ256_MASK,
31129 IX86_BUILTIN_CVTTPS2DQ128_MASK,
31130 IX86_BUILTIN_CVTTPS2UDQ256,
31131 IX86_BUILTIN_CVTTPS2UDQ128,
31132 IX86_BUILTIN_CVTTPD2DQ256_MASK,
31133 IX86_BUILTIN_CVTTPD2DQ128_MASK,
31134 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
31135 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
31136 IX86_BUILTIN_CVTPD2DQ256_MASK,
31137 IX86_BUILTIN_CVTPD2DQ128_MASK,
31138 IX86_BUILTIN_CVTDQ2PD256_MASK,
31139 IX86_BUILTIN_CVTDQ2PD128_MASK,
31140 IX86_BUILTIN_CVTUDQ2PD256_MASK,
31141 IX86_BUILTIN_CVTUDQ2PD128_MASK,
31142 IX86_BUILTIN_CVTDQ2PS256_MASK,
31143 IX86_BUILTIN_CVTDQ2PS128_MASK,
31144 IX86_BUILTIN_CVTUDQ2PS256_MASK,
31145 IX86_BUILTIN_CVTUDQ2PS128_MASK,
31146 IX86_BUILTIN_CVTPS2PD256_MASK,
31147 IX86_BUILTIN_CVTPS2PD128_MASK,
31148 IX86_BUILTIN_PBROADCASTB256_MASK,
31149 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
31150 IX86_BUILTIN_PBROADCASTB128_MASK,
31151 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
31152 IX86_BUILTIN_PBROADCASTW256_MASK,
31153 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
31154 IX86_BUILTIN_PBROADCASTW128_MASK,
31155 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
31156 IX86_BUILTIN_PBROADCASTD256_MASK,
31157 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
31158 IX86_BUILTIN_PBROADCASTD128_MASK,
31159 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
31160 IX86_BUILTIN_PBROADCASTQ256_MASK,
31161 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
31162 IX86_BUILTIN_PBROADCASTQ128_MASK,
31163 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
31164 IX86_BUILTIN_BROADCASTSS256,
31165 IX86_BUILTIN_BROADCASTSS128,
31166 IX86_BUILTIN_BROADCASTSD256,
31167 IX86_BUILTIN_EXTRACTF64X2_256,
31168 IX86_BUILTIN_EXTRACTI64X2_256,
31169 IX86_BUILTIN_INSERTF32X4_256,
31170 IX86_BUILTIN_INSERTI32X4_256,
31171 IX86_BUILTIN_PMOVSXBW256_MASK,
31172 IX86_BUILTIN_PMOVSXBW128_MASK,
31173 IX86_BUILTIN_PMOVSXBD256_MASK,
31174 IX86_BUILTIN_PMOVSXBD128_MASK,
31175 IX86_BUILTIN_PMOVSXBQ256_MASK,
31176 IX86_BUILTIN_PMOVSXBQ128_MASK,
31177 IX86_BUILTIN_PMOVSXWD256_MASK,
31178 IX86_BUILTIN_PMOVSXWD128_MASK,
31179 IX86_BUILTIN_PMOVSXWQ256_MASK,
31180 IX86_BUILTIN_PMOVSXWQ128_MASK,
31181 IX86_BUILTIN_PMOVSXDQ256_MASK,
31182 IX86_BUILTIN_PMOVSXDQ128_MASK,
31183 IX86_BUILTIN_PMOVZXBW256_MASK,
31184 IX86_BUILTIN_PMOVZXBW128_MASK,
31185 IX86_BUILTIN_PMOVZXBD256_MASK,
31186 IX86_BUILTIN_PMOVZXBD128_MASK,
31187 IX86_BUILTIN_PMOVZXBQ256_MASK,
31188 IX86_BUILTIN_PMOVZXBQ128_MASK,
31189 IX86_BUILTIN_PMOVZXWD256_MASK,
31190 IX86_BUILTIN_PMOVZXWD128_MASK,
31191 IX86_BUILTIN_PMOVZXWQ256_MASK,
31192 IX86_BUILTIN_PMOVZXWQ128_MASK,
31193 IX86_BUILTIN_PMOVZXDQ256_MASK,
31194 IX86_BUILTIN_PMOVZXDQ128_MASK,
31195 IX86_BUILTIN_REDUCEPD256_MASK,
31196 IX86_BUILTIN_REDUCEPD128_MASK,
31197 IX86_BUILTIN_REDUCEPS256_MASK,
31198 IX86_BUILTIN_REDUCEPS128_MASK,
31199 IX86_BUILTIN_REDUCESD_MASK,
31200 IX86_BUILTIN_REDUCESS_MASK,
31201 IX86_BUILTIN_VPERMVARHI256_MASK,
31202 IX86_BUILTIN_VPERMVARHI128_MASK,
31203 IX86_BUILTIN_VPERMT2VARHI256,
31204 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
31205 IX86_BUILTIN_VPERMT2VARHI128,
31206 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
31207 IX86_BUILTIN_VPERMI2VARHI256,
31208 IX86_BUILTIN_VPERMI2VARHI128,
31209 IX86_BUILTIN_RCP14PD256,
31210 IX86_BUILTIN_RCP14PD128,
31211 IX86_BUILTIN_RCP14PS256,
31212 IX86_BUILTIN_RCP14PS128,
31213 IX86_BUILTIN_RSQRT14PD256_MASK,
31214 IX86_BUILTIN_RSQRT14PD128_MASK,
31215 IX86_BUILTIN_RSQRT14PS256_MASK,
31216 IX86_BUILTIN_RSQRT14PS128_MASK,
31217 IX86_BUILTIN_SQRTPD256_MASK,
31218 IX86_BUILTIN_SQRTPD128_MASK,
31219 IX86_BUILTIN_SQRTPS256_MASK,
31220 IX86_BUILTIN_SQRTPS128_MASK,
31221 IX86_BUILTIN_PADDB128_MASK,
31222 IX86_BUILTIN_PADDW128_MASK,
31223 IX86_BUILTIN_PADDD128_MASK,
31224 IX86_BUILTIN_PADDQ128_MASK,
31225 IX86_BUILTIN_PSUBB128_MASK,
31226 IX86_BUILTIN_PSUBW128_MASK,
31227 IX86_BUILTIN_PSUBD128_MASK,
31228 IX86_BUILTIN_PSUBQ128_MASK,
31229 IX86_BUILTIN_PADDSB128_MASK,
31230 IX86_BUILTIN_PADDSW128_MASK,
31231 IX86_BUILTIN_PSUBSB128_MASK,
31232 IX86_BUILTIN_PSUBSW128_MASK,
31233 IX86_BUILTIN_PADDUSB128_MASK,
31234 IX86_BUILTIN_PADDUSW128_MASK,
31235 IX86_BUILTIN_PSUBUSB128_MASK,
31236 IX86_BUILTIN_PSUBUSW128_MASK,
31237 IX86_BUILTIN_PADDB256_MASK,
31238 IX86_BUILTIN_PADDW256_MASK,
31239 IX86_BUILTIN_PADDD256_MASK,
31240 IX86_BUILTIN_PADDQ256_MASK,
31241 IX86_BUILTIN_PADDSB256_MASK,
31242 IX86_BUILTIN_PADDSW256_MASK,
31243 IX86_BUILTIN_PADDUSB256_MASK,
31244 IX86_BUILTIN_PADDUSW256_MASK,
31245 IX86_BUILTIN_PSUBB256_MASK,
31246 IX86_BUILTIN_PSUBW256_MASK,
31247 IX86_BUILTIN_PSUBD256_MASK,
31248 IX86_BUILTIN_PSUBQ256_MASK,
31249 IX86_BUILTIN_PSUBSB256_MASK,
31250 IX86_BUILTIN_PSUBSW256_MASK,
31251 IX86_BUILTIN_PSUBUSB256_MASK,
31252 IX86_BUILTIN_PSUBUSW256_MASK,
31253 IX86_BUILTIN_SHUF_F64x2_256,
31254 IX86_BUILTIN_SHUF_I64x2_256,
31255 IX86_BUILTIN_SHUF_I32x4_256,
31256 IX86_BUILTIN_SHUF_F32x4_256,
31257 IX86_BUILTIN_PMOVWB128,
31258 IX86_BUILTIN_PMOVWB256,
31259 IX86_BUILTIN_PMOVSWB128,
31260 IX86_BUILTIN_PMOVSWB256,
31261 IX86_BUILTIN_PMOVUSWB128,
31262 IX86_BUILTIN_PMOVUSWB256,
31263 IX86_BUILTIN_PMOVDB128,
31264 IX86_BUILTIN_PMOVDB256,
31265 IX86_BUILTIN_PMOVSDB128,
31266 IX86_BUILTIN_PMOVSDB256,
31267 IX86_BUILTIN_PMOVUSDB128,
31268 IX86_BUILTIN_PMOVUSDB256,
31269 IX86_BUILTIN_PMOVDW128,
31270 IX86_BUILTIN_PMOVDW256,
31271 IX86_BUILTIN_PMOVSDW128,
31272 IX86_BUILTIN_PMOVSDW256,
31273 IX86_BUILTIN_PMOVUSDW128,
31274 IX86_BUILTIN_PMOVUSDW256,
31275 IX86_BUILTIN_PMOVQB128,
31276 IX86_BUILTIN_PMOVQB256,
31277 IX86_BUILTIN_PMOVSQB128,
31278 IX86_BUILTIN_PMOVSQB256,
31279 IX86_BUILTIN_PMOVUSQB128,
31280 IX86_BUILTIN_PMOVUSQB256,
31281 IX86_BUILTIN_PMOVQW128,
31282 IX86_BUILTIN_PMOVQW256,
31283 IX86_BUILTIN_PMOVSQW128,
31284 IX86_BUILTIN_PMOVSQW256,
31285 IX86_BUILTIN_PMOVUSQW128,
31286 IX86_BUILTIN_PMOVUSQW256,
31287 IX86_BUILTIN_PMOVQD128,
31288 IX86_BUILTIN_PMOVQD256,
31289 IX86_BUILTIN_PMOVSQD128,
31290 IX86_BUILTIN_PMOVSQD256,
31291 IX86_BUILTIN_PMOVUSQD128,
31292 IX86_BUILTIN_PMOVUSQD256,
31293 IX86_BUILTIN_RANGEPD256,
31294 IX86_BUILTIN_RANGEPD128,
31295 IX86_BUILTIN_RANGEPS256,
31296 IX86_BUILTIN_RANGEPS128,
31297 IX86_BUILTIN_GETEXPPS256,
31298 IX86_BUILTIN_GETEXPPD256,
31299 IX86_BUILTIN_GETEXPPS128,
31300 IX86_BUILTIN_GETEXPPD128,
31301 IX86_BUILTIN_FIXUPIMMPD256_MASK,
31302 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
31303 IX86_BUILTIN_FIXUPIMMPS256_MASK,
31304 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
31305 IX86_BUILTIN_FIXUPIMMPD128_MASK,
31306 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
31307 IX86_BUILTIN_FIXUPIMMPS128_MASK,
31308 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
31309 IX86_BUILTIN_PABSQ256,
31310 IX86_BUILTIN_PABSQ128,
31311 IX86_BUILTIN_PABSD256_MASK,
31312 IX86_BUILTIN_PABSD128_MASK,
31313 IX86_BUILTIN_PMULHRSW256_MASK,
31314 IX86_BUILTIN_PMULHRSW128_MASK,
31315 IX86_BUILTIN_PMULHUW128_MASK,
31316 IX86_BUILTIN_PMULHUW256_MASK,
31317 IX86_BUILTIN_PMULHW256_MASK,
31318 IX86_BUILTIN_PMULHW128_MASK,
31319 IX86_BUILTIN_PMULLW256_MASK,
31320 IX86_BUILTIN_PMULLW128_MASK,
31321 IX86_BUILTIN_PMULLQ256,
31322 IX86_BUILTIN_PMULLQ128,
31323 IX86_BUILTIN_ANDPD256_MASK,
31324 IX86_BUILTIN_ANDPD128_MASK,
31325 IX86_BUILTIN_ANDPS256_MASK,
31326 IX86_BUILTIN_ANDPS128_MASK,
31327 IX86_BUILTIN_ANDNPD256_MASK,
31328 IX86_BUILTIN_ANDNPD128_MASK,
31329 IX86_BUILTIN_ANDNPS256_MASK,
31330 IX86_BUILTIN_ANDNPS128_MASK,
31331 IX86_BUILTIN_PSLLWI128_MASK,
31332 IX86_BUILTIN_PSLLDI128_MASK,
31333 IX86_BUILTIN_PSLLQI128_MASK,
31334 IX86_BUILTIN_PSLLW128_MASK,
31335 IX86_BUILTIN_PSLLD128_MASK,
31336 IX86_BUILTIN_PSLLQ128_MASK,
31337 IX86_BUILTIN_PSLLWI256_MASK ,
31338 IX86_BUILTIN_PSLLW256_MASK,
31339 IX86_BUILTIN_PSLLDI256_MASK,
31340 IX86_BUILTIN_PSLLD256_MASK,
31341 IX86_BUILTIN_PSLLQI256_MASK,
31342 IX86_BUILTIN_PSLLQ256_MASK,
31343 IX86_BUILTIN_PSRADI128_MASK,
31344 IX86_BUILTIN_PSRAD128_MASK,
31345 IX86_BUILTIN_PSRADI256_MASK,
31346 IX86_BUILTIN_PSRAD256_MASK,
31347 IX86_BUILTIN_PSRAQI128_MASK,
31348 IX86_BUILTIN_PSRAQ128_MASK,
31349 IX86_BUILTIN_PSRAQI256_MASK,
31350 IX86_BUILTIN_PSRAQ256_MASK,
31351 IX86_BUILTIN_PANDD256,
31352 IX86_BUILTIN_PANDD128,
31353 IX86_BUILTIN_PSRLDI128_MASK,
31354 IX86_BUILTIN_PSRLD128_MASK,
31355 IX86_BUILTIN_PSRLDI256_MASK,
31356 IX86_BUILTIN_PSRLD256_MASK,
31357 IX86_BUILTIN_PSRLQI128_MASK,
31358 IX86_BUILTIN_PSRLQ128_MASK,
31359 IX86_BUILTIN_PSRLQI256_MASK,
31360 IX86_BUILTIN_PSRLQ256_MASK,
31361 IX86_BUILTIN_PANDQ256,
31362 IX86_BUILTIN_PANDQ128,
31363 IX86_BUILTIN_PANDND256,
31364 IX86_BUILTIN_PANDND128,
31365 IX86_BUILTIN_PANDNQ256,
31366 IX86_BUILTIN_PANDNQ128,
31367 IX86_BUILTIN_PORD256,
31368 IX86_BUILTIN_PORD128,
31369 IX86_BUILTIN_PORQ256,
31370 IX86_BUILTIN_PORQ128,
31371 IX86_BUILTIN_PXORD256,
31372 IX86_BUILTIN_PXORD128,
31373 IX86_BUILTIN_PXORQ256,
31374 IX86_BUILTIN_PXORQ128,
31375 IX86_BUILTIN_PACKSSWB256_MASK,
31376 IX86_BUILTIN_PACKSSWB128_MASK,
31377 IX86_BUILTIN_PACKUSWB256_MASK,
31378 IX86_BUILTIN_PACKUSWB128_MASK,
31379 IX86_BUILTIN_RNDSCALEPS256,
31380 IX86_BUILTIN_RNDSCALEPD256,
31381 IX86_BUILTIN_RNDSCALEPS128,
31382 IX86_BUILTIN_RNDSCALEPD128,
31383 IX86_BUILTIN_VTERNLOGQ256_MASK,
31384 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
31385 IX86_BUILTIN_VTERNLOGD256_MASK,
31386 IX86_BUILTIN_VTERNLOGD256_MASKZ,
31387 IX86_BUILTIN_VTERNLOGQ128_MASK,
31388 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
31389 IX86_BUILTIN_VTERNLOGD128_MASK,
31390 IX86_BUILTIN_VTERNLOGD128_MASKZ,
31391 IX86_BUILTIN_SCALEFPD256,
31392 IX86_BUILTIN_SCALEFPS256,
31393 IX86_BUILTIN_SCALEFPD128,
31394 IX86_BUILTIN_SCALEFPS128,
31395 IX86_BUILTIN_VFMADDPD256_MASK,
31396 IX86_BUILTIN_VFMADDPD256_MASK3,
31397 IX86_BUILTIN_VFMADDPD256_MASKZ,
31398 IX86_BUILTIN_VFMADDPD128_MASK,
31399 IX86_BUILTIN_VFMADDPD128_MASK3,
31400 IX86_BUILTIN_VFMADDPD128_MASKZ,
31401 IX86_BUILTIN_VFMADDPS256_MASK,
31402 IX86_BUILTIN_VFMADDPS256_MASK3,
31403 IX86_BUILTIN_VFMADDPS256_MASKZ,
31404 IX86_BUILTIN_VFMADDPS128_MASK,
31405 IX86_BUILTIN_VFMADDPS128_MASK3,
31406 IX86_BUILTIN_VFMADDPS128_MASKZ,
31407 IX86_BUILTIN_VFMSUBPD256_MASK3,
31408 IX86_BUILTIN_VFMSUBPD128_MASK3,
31409 IX86_BUILTIN_VFMSUBPS256_MASK3,
31410 IX86_BUILTIN_VFMSUBPS128_MASK3,
31411 IX86_BUILTIN_VFNMADDPD256_MASK,
31412 IX86_BUILTIN_VFNMADDPD128_MASK,
31413 IX86_BUILTIN_VFNMADDPS256_MASK,
31414 IX86_BUILTIN_VFNMADDPS128_MASK,
31415 IX86_BUILTIN_VFNMSUBPD256_MASK,
31416 IX86_BUILTIN_VFNMSUBPD256_MASK3,
31417 IX86_BUILTIN_VFNMSUBPD128_MASK,
31418 IX86_BUILTIN_VFNMSUBPD128_MASK3,
31419 IX86_BUILTIN_VFNMSUBPS256_MASK,
31420 IX86_BUILTIN_VFNMSUBPS256_MASK3,
31421 IX86_BUILTIN_VFNMSUBPS128_MASK,
31422 IX86_BUILTIN_VFNMSUBPS128_MASK3,
31423 IX86_BUILTIN_VFMADDSUBPD256_MASK,
31424 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
31425 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
31426 IX86_BUILTIN_VFMADDSUBPD128_MASK,
31427 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
31428 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
31429 IX86_BUILTIN_VFMADDSUBPS256_MASK,
31430 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
31431 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
31432 IX86_BUILTIN_VFMADDSUBPS128_MASK,
31433 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
31434 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
31435 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
31436 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
31437 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
31438 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
31439 IX86_BUILTIN_INSERTF64X2_256,
31440 IX86_BUILTIN_INSERTI64X2_256,
31441 IX86_BUILTIN_PSRAVV16HI,
31442 IX86_BUILTIN_PSRAVV8HI,
31443 IX86_BUILTIN_PMADDUBSW256_MASK,
31444 IX86_BUILTIN_PMADDUBSW128_MASK,
31445 IX86_BUILTIN_PMADDWD256_MASK,
31446 IX86_BUILTIN_PMADDWD128_MASK,
31447 IX86_BUILTIN_PSRLVV16HI,
31448 IX86_BUILTIN_PSRLVV8HI,
31449 IX86_BUILTIN_CVTPS2DQ256_MASK,
31450 IX86_BUILTIN_CVTPS2DQ128_MASK,
31451 IX86_BUILTIN_CVTPS2UDQ256,
31452 IX86_BUILTIN_CVTPS2UDQ128,
31453 IX86_BUILTIN_CVTPS2QQ256,
31454 IX86_BUILTIN_CVTPS2QQ128,
31455 IX86_BUILTIN_CVTPS2UQQ256,
31456 IX86_BUILTIN_CVTPS2UQQ128,
31457 IX86_BUILTIN_GETMANTPS256,
31458 IX86_BUILTIN_GETMANTPS128,
31459 IX86_BUILTIN_GETMANTPD256,
31460 IX86_BUILTIN_GETMANTPD128,
31461 IX86_BUILTIN_MOVDDUP256_MASK,
31462 IX86_BUILTIN_MOVDDUP128_MASK,
31463 IX86_BUILTIN_MOVSHDUP256_MASK,
31464 IX86_BUILTIN_MOVSHDUP128_MASK,
31465 IX86_BUILTIN_MOVSLDUP256_MASK,
31466 IX86_BUILTIN_MOVSLDUP128_MASK,
31467 IX86_BUILTIN_CVTQQ2PS256,
31468 IX86_BUILTIN_CVTQQ2PS128,
31469 IX86_BUILTIN_CVTUQQ2PS256,
31470 IX86_BUILTIN_CVTUQQ2PS128,
31471 IX86_BUILTIN_CVTQQ2PD256,
31472 IX86_BUILTIN_CVTQQ2PD128,
31473 IX86_BUILTIN_CVTUQQ2PD256,
31474 IX86_BUILTIN_CVTUQQ2PD128,
31475 IX86_BUILTIN_VPERMT2VARQ256,
31476 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31477 IX86_BUILTIN_VPERMT2VARD256,
31478 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31479 IX86_BUILTIN_VPERMI2VARQ256,
31480 IX86_BUILTIN_VPERMI2VARD256,
31481 IX86_BUILTIN_VPERMT2VARPD256,
31482 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31483 IX86_BUILTIN_VPERMT2VARPS256,
31484 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31485 IX86_BUILTIN_VPERMI2VARPD256,
31486 IX86_BUILTIN_VPERMI2VARPS256,
31487 IX86_BUILTIN_VPERMT2VARQ128,
31488 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31489 IX86_BUILTIN_VPERMT2VARD128,
31490 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31491 IX86_BUILTIN_VPERMI2VARQ128,
31492 IX86_BUILTIN_VPERMI2VARD128,
31493 IX86_BUILTIN_VPERMT2VARPD128,
31494 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31495 IX86_BUILTIN_VPERMT2VARPS128,
31496 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31497 IX86_BUILTIN_VPERMI2VARPD128,
31498 IX86_BUILTIN_VPERMI2VARPS128,
31499 IX86_BUILTIN_PSHUFB256_MASK,
31500 IX86_BUILTIN_PSHUFB128_MASK,
31501 IX86_BUILTIN_PSHUFHW256_MASK,
31502 IX86_BUILTIN_PSHUFHW128_MASK,
31503 IX86_BUILTIN_PSHUFLW256_MASK,
31504 IX86_BUILTIN_PSHUFLW128_MASK,
31505 IX86_BUILTIN_PSHUFD256_MASK,
31506 IX86_BUILTIN_PSHUFD128_MASK,
31507 IX86_BUILTIN_SHUFPD256_MASK,
31508 IX86_BUILTIN_SHUFPD128_MASK,
31509 IX86_BUILTIN_SHUFPS256_MASK,
31510 IX86_BUILTIN_SHUFPS128_MASK,
31511 IX86_BUILTIN_PROLVQ256,
31512 IX86_BUILTIN_PROLVQ128,
31513 IX86_BUILTIN_PROLQ256,
31514 IX86_BUILTIN_PROLQ128,
31515 IX86_BUILTIN_PRORVQ256,
31516 IX86_BUILTIN_PRORVQ128,
31517 IX86_BUILTIN_PRORQ256,
31518 IX86_BUILTIN_PRORQ128,
31519 IX86_BUILTIN_PSRAVQ128,
31520 IX86_BUILTIN_PSRAVQ256,
31521 IX86_BUILTIN_PSLLVV4DI_MASK,
31522 IX86_BUILTIN_PSLLVV2DI_MASK,
31523 IX86_BUILTIN_PSLLVV8SI_MASK,
31524 IX86_BUILTIN_PSLLVV4SI_MASK,
31525 IX86_BUILTIN_PSRAVV8SI_MASK,
31526 IX86_BUILTIN_PSRAVV4SI_MASK,
31527 IX86_BUILTIN_PSRLVV4DI_MASK,
31528 IX86_BUILTIN_PSRLVV2DI_MASK,
31529 IX86_BUILTIN_PSRLVV8SI_MASK,
31530 IX86_BUILTIN_PSRLVV4SI_MASK,
31531 IX86_BUILTIN_PSRAWI256_MASK,
31532 IX86_BUILTIN_PSRAW256_MASK,
31533 IX86_BUILTIN_PSRAWI128_MASK,
31534 IX86_BUILTIN_PSRAW128_MASK,
31535 IX86_BUILTIN_PSRLWI256_MASK,
31536 IX86_BUILTIN_PSRLW256_MASK,
31537 IX86_BUILTIN_PSRLWI128_MASK,
31538 IX86_BUILTIN_PSRLW128_MASK,
31539 IX86_BUILTIN_PRORVD256,
31540 IX86_BUILTIN_PROLVD256,
31541 IX86_BUILTIN_PRORD256,
31542 IX86_BUILTIN_PROLD256,
31543 IX86_BUILTIN_PRORVD128,
31544 IX86_BUILTIN_PROLVD128,
31545 IX86_BUILTIN_PRORD128,
31546 IX86_BUILTIN_PROLD128,
31547 IX86_BUILTIN_FPCLASSPD256,
31548 IX86_BUILTIN_FPCLASSPD128,
31549 IX86_BUILTIN_FPCLASSSD,
31550 IX86_BUILTIN_FPCLASSPS256,
31551 IX86_BUILTIN_FPCLASSPS128,
31552 IX86_BUILTIN_FPCLASSSS,
31553 IX86_BUILTIN_CVTB2MASK128,
31554 IX86_BUILTIN_CVTB2MASK256,
31555 IX86_BUILTIN_CVTW2MASK128,
31556 IX86_BUILTIN_CVTW2MASK256,
31557 IX86_BUILTIN_CVTD2MASK128,
31558 IX86_BUILTIN_CVTD2MASK256,
31559 IX86_BUILTIN_CVTQ2MASK128,
31560 IX86_BUILTIN_CVTQ2MASK256,
31561 IX86_BUILTIN_CVTMASK2B128,
31562 IX86_BUILTIN_CVTMASK2B256,
31563 IX86_BUILTIN_CVTMASK2W128,
31564 IX86_BUILTIN_CVTMASK2W256,
31565 IX86_BUILTIN_CVTMASK2D128,
31566 IX86_BUILTIN_CVTMASK2D256,
31567 IX86_BUILTIN_CVTMASK2Q128,
31568 IX86_BUILTIN_CVTMASK2Q256,
31569 IX86_BUILTIN_PCMPEQB128_MASK,
31570 IX86_BUILTIN_PCMPEQB256_MASK,
31571 IX86_BUILTIN_PCMPEQW128_MASK,
31572 IX86_BUILTIN_PCMPEQW256_MASK,
31573 IX86_BUILTIN_PCMPEQD128_MASK,
31574 IX86_BUILTIN_PCMPEQD256_MASK,
31575 IX86_BUILTIN_PCMPEQQ128_MASK,
31576 IX86_BUILTIN_PCMPEQQ256_MASK,
31577 IX86_BUILTIN_PCMPGTB128_MASK,
31578 IX86_BUILTIN_PCMPGTB256_MASK,
31579 IX86_BUILTIN_PCMPGTW128_MASK,
31580 IX86_BUILTIN_PCMPGTW256_MASK,
31581 IX86_BUILTIN_PCMPGTD128_MASK,
31582 IX86_BUILTIN_PCMPGTD256_MASK,
31583 IX86_BUILTIN_PCMPGTQ128_MASK,
31584 IX86_BUILTIN_PCMPGTQ256_MASK,
31585 IX86_BUILTIN_PTESTMB128,
31586 IX86_BUILTIN_PTESTMB256,
31587 IX86_BUILTIN_PTESTMW128,
31588 IX86_BUILTIN_PTESTMW256,
31589 IX86_BUILTIN_PTESTMD128,
31590 IX86_BUILTIN_PTESTMD256,
31591 IX86_BUILTIN_PTESTMQ128,
31592 IX86_BUILTIN_PTESTMQ256,
31593 IX86_BUILTIN_PTESTNMB128,
31594 IX86_BUILTIN_PTESTNMB256,
31595 IX86_BUILTIN_PTESTNMW128,
31596 IX86_BUILTIN_PTESTNMW256,
31597 IX86_BUILTIN_PTESTNMD128,
31598 IX86_BUILTIN_PTESTNMD256,
31599 IX86_BUILTIN_PTESTNMQ128,
31600 IX86_BUILTIN_PTESTNMQ256,
31601 IX86_BUILTIN_PBROADCASTMB128,
31602 IX86_BUILTIN_PBROADCASTMB256,
31603 IX86_BUILTIN_PBROADCASTMW128,
31604 IX86_BUILTIN_PBROADCASTMW256,
31605 IX86_BUILTIN_COMPRESSPD256,
31606 IX86_BUILTIN_COMPRESSPD128,
31607 IX86_BUILTIN_COMPRESSPS256,
31608 IX86_BUILTIN_COMPRESSPS128,
31609 IX86_BUILTIN_PCOMPRESSQ256,
31610 IX86_BUILTIN_PCOMPRESSQ128,
31611 IX86_BUILTIN_PCOMPRESSD256,
31612 IX86_BUILTIN_PCOMPRESSD128,
31613 IX86_BUILTIN_EXPANDPD256,
31614 IX86_BUILTIN_EXPANDPD128,
31615 IX86_BUILTIN_EXPANDPS256,
31616 IX86_BUILTIN_EXPANDPS128,
31617 IX86_BUILTIN_PEXPANDQ256,
31618 IX86_BUILTIN_PEXPANDQ128,
31619 IX86_BUILTIN_PEXPANDD256,
31620 IX86_BUILTIN_PEXPANDD128,
31621 IX86_BUILTIN_EXPANDPD256Z,
31622 IX86_BUILTIN_EXPANDPD128Z,
31623 IX86_BUILTIN_EXPANDPS256Z,
31624 IX86_BUILTIN_EXPANDPS128Z,
31625 IX86_BUILTIN_PEXPANDQ256Z,
31626 IX86_BUILTIN_PEXPANDQ128Z,
31627 IX86_BUILTIN_PEXPANDD256Z,
31628 IX86_BUILTIN_PEXPANDD128Z,
31629 IX86_BUILTIN_PMAXSD256_MASK,
31630 IX86_BUILTIN_PMINSD256_MASK,
31631 IX86_BUILTIN_PMAXUD256_MASK,
31632 IX86_BUILTIN_PMINUD256_MASK,
31633 IX86_BUILTIN_PMAXSD128_MASK,
31634 IX86_BUILTIN_PMINSD128_MASK,
31635 IX86_BUILTIN_PMAXUD128_MASK,
31636 IX86_BUILTIN_PMINUD128_MASK,
31637 IX86_BUILTIN_PMAXSQ256_MASK,
31638 IX86_BUILTIN_PMINSQ256_MASK,
31639 IX86_BUILTIN_PMAXUQ256_MASK,
31640 IX86_BUILTIN_PMINUQ256_MASK,
31641 IX86_BUILTIN_PMAXSQ128_MASK,
31642 IX86_BUILTIN_PMINSQ128_MASK,
31643 IX86_BUILTIN_PMAXUQ128_MASK,
31644 IX86_BUILTIN_PMINUQ128_MASK,
31645 IX86_BUILTIN_PMINSB256_MASK,
31646 IX86_BUILTIN_PMINUB256_MASK,
31647 IX86_BUILTIN_PMAXSB256_MASK,
31648 IX86_BUILTIN_PMAXUB256_MASK,
31649 IX86_BUILTIN_PMINSB128_MASK,
31650 IX86_BUILTIN_PMINUB128_MASK,
31651 IX86_BUILTIN_PMAXSB128_MASK,
31652 IX86_BUILTIN_PMAXUB128_MASK,
31653 IX86_BUILTIN_PMINSW256_MASK,
31654 IX86_BUILTIN_PMINUW256_MASK,
31655 IX86_BUILTIN_PMAXSW256_MASK,
31656 IX86_BUILTIN_PMAXUW256_MASK,
31657 IX86_BUILTIN_PMINSW128_MASK,
31658 IX86_BUILTIN_PMINUW128_MASK,
31659 IX86_BUILTIN_PMAXSW128_MASK,
31660 IX86_BUILTIN_PMAXUW128_MASK,
31661 IX86_BUILTIN_VPCONFLICTQ256,
31662 IX86_BUILTIN_VPCONFLICTD256,
31663 IX86_BUILTIN_VPCLZCNTQ256,
31664 IX86_BUILTIN_VPCLZCNTD256,
31665 IX86_BUILTIN_UNPCKHPD256_MASK,
31666 IX86_BUILTIN_UNPCKHPD128_MASK,
31667 IX86_BUILTIN_UNPCKHPS256_MASK,
31668 IX86_BUILTIN_UNPCKHPS128_MASK,
31669 IX86_BUILTIN_UNPCKLPD256_MASK,
31670 IX86_BUILTIN_UNPCKLPD128_MASK,
31671 IX86_BUILTIN_UNPCKLPS256_MASK,
31672 IX86_BUILTIN_VPCONFLICTQ128,
31673 IX86_BUILTIN_VPCONFLICTD128,
31674 IX86_BUILTIN_VPCLZCNTQ128,
31675 IX86_BUILTIN_VPCLZCNTD128,
31676 IX86_BUILTIN_UNPCKLPS128_MASK,
31677 IX86_BUILTIN_ALIGND256,
31678 IX86_BUILTIN_ALIGNQ256,
31679 IX86_BUILTIN_ALIGND128,
31680 IX86_BUILTIN_ALIGNQ128,
31681 IX86_BUILTIN_CVTPS2PH256_MASK,
31682 IX86_BUILTIN_CVTPS2PH_MASK,
31683 IX86_BUILTIN_CVTPH2PS_MASK,
31684 IX86_BUILTIN_CVTPH2PS256_MASK,
31685 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31686 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31687 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31688 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31689 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31690 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31691 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31692 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31693 IX86_BUILTIN_PUNPCKHBW128_MASK,
31694 IX86_BUILTIN_PUNPCKHBW256_MASK,
31695 IX86_BUILTIN_PUNPCKHWD128_MASK,
31696 IX86_BUILTIN_PUNPCKHWD256_MASK,
31697 IX86_BUILTIN_PUNPCKLBW128_MASK,
31698 IX86_BUILTIN_PUNPCKLBW256_MASK,
31699 IX86_BUILTIN_PUNPCKLWD128_MASK,
31700 IX86_BUILTIN_PUNPCKLWD256_MASK,
31701 IX86_BUILTIN_PSLLVV16HI,
31702 IX86_BUILTIN_PSLLVV8HI,
31703 IX86_BUILTIN_PACKSSDW256_MASK,
31704 IX86_BUILTIN_PACKSSDW128_MASK,
31705 IX86_BUILTIN_PACKUSDW256_MASK,
31706 IX86_BUILTIN_PACKUSDW128_MASK,
31707 IX86_BUILTIN_PAVGB256_MASK,
31708 IX86_BUILTIN_PAVGW256_MASK,
31709 IX86_BUILTIN_PAVGB128_MASK,
31710 IX86_BUILTIN_PAVGW128_MASK,
31711 IX86_BUILTIN_VPERMVARSF256_MASK,
31712 IX86_BUILTIN_VPERMVARDF256_MASK,
31713 IX86_BUILTIN_VPERMDF256_MASK,
31714 IX86_BUILTIN_PABSB256_MASK,
31715 IX86_BUILTIN_PABSB128_MASK,
31716 IX86_BUILTIN_PABSW256_MASK,
31717 IX86_BUILTIN_PABSW128_MASK,
31718 IX86_BUILTIN_VPERMILVARPD_MASK,
31719 IX86_BUILTIN_VPERMILVARPS_MASK,
31720 IX86_BUILTIN_VPERMILVARPD256_MASK,
31721 IX86_BUILTIN_VPERMILVARPS256_MASK,
31722 IX86_BUILTIN_VPERMILPD_MASK,
31723 IX86_BUILTIN_VPERMILPS_MASK,
31724 IX86_BUILTIN_VPERMILPD256_MASK,
31725 IX86_BUILTIN_VPERMILPS256_MASK,
31726 IX86_BUILTIN_BLENDMQ256,
31727 IX86_BUILTIN_BLENDMD256,
31728 IX86_BUILTIN_BLENDMPD256,
31729 IX86_BUILTIN_BLENDMPS256,
31730 IX86_BUILTIN_BLENDMQ128,
31731 IX86_BUILTIN_BLENDMD128,
31732 IX86_BUILTIN_BLENDMPD128,
31733 IX86_BUILTIN_BLENDMPS128,
31734 IX86_BUILTIN_BLENDMW256,
31735 IX86_BUILTIN_BLENDMB256,
31736 IX86_BUILTIN_BLENDMW128,
31737 IX86_BUILTIN_BLENDMB128,
31738 IX86_BUILTIN_PMULLD256_MASK,
31739 IX86_BUILTIN_PMULLD128_MASK,
31740 IX86_BUILTIN_PMULUDQ256_MASK,
31741 IX86_BUILTIN_PMULDQ256_MASK,
31742 IX86_BUILTIN_PMULDQ128_MASK,
31743 IX86_BUILTIN_PMULUDQ128_MASK,
31744 IX86_BUILTIN_CVTPD2PS256_MASK,
31745 IX86_BUILTIN_CVTPD2PS_MASK,
31746 IX86_BUILTIN_VPERMVARSI256_MASK,
31747 IX86_BUILTIN_VPERMVARDI256_MASK,
31748 IX86_BUILTIN_VPERMDI256_MASK,
31749 IX86_BUILTIN_CMPQ256,
31750 IX86_BUILTIN_CMPD256,
31751 IX86_BUILTIN_UCMPQ256,
31752 IX86_BUILTIN_UCMPD256,
31753 IX86_BUILTIN_CMPB256,
31754 IX86_BUILTIN_CMPW256,
31755 IX86_BUILTIN_UCMPB256,
31756 IX86_BUILTIN_UCMPW256,
31757 IX86_BUILTIN_CMPPD256_MASK,
31758 IX86_BUILTIN_CMPPS256_MASK,
31759 IX86_BUILTIN_CMPQ128,
31760 IX86_BUILTIN_CMPD128,
31761 IX86_BUILTIN_UCMPQ128,
31762 IX86_BUILTIN_UCMPD128,
31763 IX86_BUILTIN_CMPB128,
31764 IX86_BUILTIN_CMPW128,
31765 IX86_BUILTIN_UCMPB128,
31766 IX86_BUILTIN_UCMPW128,
31767 IX86_BUILTIN_CMPPD128_MASK,
31768 IX86_BUILTIN_CMPPS128_MASK,
31770 IX86_BUILTIN_GATHER3SIV8SF,
31771 IX86_BUILTIN_GATHER3SIV4SF,
31772 IX86_BUILTIN_GATHER3SIV4DF,
31773 IX86_BUILTIN_GATHER3SIV2DF,
31774 IX86_BUILTIN_GATHER3DIV8SF,
31775 IX86_BUILTIN_GATHER3DIV4SF,
31776 IX86_BUILTIN_GATHER3DIV4DF,
31777 IX86_BUILTIN_GATHER3DIV2DF,
31778 IX86_BUILTIN_GATHER3SIV8SI,
31779 IX86_BUILTIN_GATHER3SIV4SI,
31780 IX86_BUILTIN_GATHER3SIV4DI,
31781 IX86_BUILTIN_GATHER3SIV2DI,
31782 IX86_BUILTIN_GATHER3DIV8SI,
31783 IX86_BUILTIN_GATHER3DIV4SI,
31784 IX86_BUILTIN_GATHER3DIV4DI,
31785 IX86_BUILTIN_GATHER3DIV2DI,
31786 IX86_BUILTIN_SCATTERSIV8SF,
31787 IX86_BUILTIN_SCATTERSIV4SF,
31788 IX86_BUILTIN_SCATTERSIV4DF,
31789 IX86_BUILTIN_SCATTERSIV2DF,
31790 IX86_BUILTIN_SCATTERDIV8SF,
31791 IX86_BUILTIN_SCATTERDIV4SF,
31792 IX86_BUILTIN_SCATTERDIV4DF,
31793 IX86_BUILTIN_SCATTERDIV2DF,
31794 IX86_BUILTIN_SCATTERSIV8SI,
31795 IX86_BUILTIN_SCATTERSIV4SI,
31796 IX86_BUILTIN_SCATTERSIV4DI,
31797 IX86_BUILTIN_SCATTERSIV2DI,
31798 IX86_BUILTIN_SCATTERDIV8SI,
31799 IX86_BUILTIN_SCATTERDIV4SI,
31800 IX86_BUILTIN_SCATTERDIV4DI,
31801 IX86_BUILTIN_SCATTERDIV2DI,
31804 IX86_BUILTIN_RANGESD128,
31805 IX86_BUILTIN_RANGESS128,
31806 IX86_BUILTIN_KUNPCKWD,
31807 IX86_BUILTIN_KUNPCKDQ,
31808 IX86_BUILTIN_BROADCASTF32x2_512,
31809 IX86_BUILTIN_BROADCASTI32x2_512,
31810 IX86_BUILTIN_BROADCASTF64X2_512,
31811 IX86_BUILTIN_BROADCASTI64X2_512,
31812 IX86_BUILTIN_BROADCASTF32X8_512,
31813 IX86_BUILTIN_BROADCASTI32X8_512,
31814 IX86_BUILTIN_EXTRACTF64X2_512,
31815 IX86_BUILTIN_EXTRACTF32X8,
31816 IX86_BUILTIN_EXTRACTI64X2_512,
31817 IX86_BUILTIN_EXTRACTI32X8,
31818 IX86_BUILTIN_REDUCEPD512_MASK,
31819 IX86_BUILTIN_REDUCEPS512_MASK,
31820 IX86_BUILTIN_PMULLQ512,
31821 IX86_BUILTIN_XORPD512,
31822 IX86_BUILTIN_XORPS512,
31823 IX86_BUILTIN_ORPD512,
31824 IX86_BUILTIN_ORPS512,
31825 IX86_BUILTIN_ANDPD512,
31826 IX86_BUILTIN_ANDPS512,
31827 IX86_BUILTIN_ANDNPD512,
31828 IX86_BUILTIN_ANDNPS512,
31829 IX86_BUILTIN_INSERTF32X8,
31830 IX86_BUILTIN_INSERTI32X8,
31831 IX86_BUILTIN_INSERTF64X2_512,
31832 IX86_BUILTIN_INSERTI64X2_512,
31833 IX86_BUILTIN_FPCLASSPD512,
31834 IX86_BUILTIN_FPCLASSPS512,
31835 IX86_BUILTIN_CVTD2MASK512,
31836 IX86_BUILTIN_CVTQ2MASK512,
31837 IX86_BUILTIN_CVTMASK2D512,
31838 IX86_BUILTIN_CVTMASK2Q512,
31839 IX86_BUILTIN_CVTPD2QQ512,
31840 IX86_BUILTIN_CVTPS2QQ512,
31841 IX86_BUILTIN_CVTPD2UQQ512,
31842 IX86_BUILTIN_CVTPS2UQQ512,
31843 IX86_BUILTIN_CVTQQ2PS512,
31844 IX86_BUILTIN_CVTUQQ2PS512,
31845 IX86_BUILTIN_CVTQQ2PD512,
31846 IX86_BUILTIN_CVTUQQ2PD512,
31847 IX86_BUILTIN_CVTTPS2QQ512,
31848 IX86_BUILTIN_CVTTPS2UQQ512,
31849 IX86_BUILTIN_CVTTPD2QQ512,
31850 IX86_BUILTIN_CVTTPD2UQQ512,
31851 IX86_BUILTIN_RANGEPS512,
31852 IX86_BUILTIN_RANGEPD512,
31855 IX86_BUILTIN_PACKUSDW512,
31856 IX86_BUILTIN_PACKSSDW512,
31857 IX86_BUILTIN_LOADDQUHI512_MASK,
31858 IX86_BUILTIN_LOADDQUQI512_MASK,
31859 IX86_BUILTIN_PSLLDQ512,
31860 IX86_BUILTIN_PSRLDQ512,
31861 IX86_BUILTIN_STOREDQUHI512_MASK,
31862 IX86_BUILTIN_STOREDQUQI512_MASK,
31863 IX86_BUILTIN_PALIGNR512,
31864 IX86_BUILTIN_PALIGNR512_MASK,
31865 IX86_BUILTIN_MOVDQUHI512_MASK,
31866 IX86_BUILTIN_MOVDQUQI512_MASK,
31867 IX86_BUILTIN_PSADBW512,
31868 IX86_BUILTIN_DBPSADBW512,
31869 IX86_BUILTIN_PBROADCASTB512,
31870 IX86_BUILTIN_PBROADCASTB512_GPR,
31871 IX86_BUILTIN_PBROADCASTW512,
31872 IX86_BUILTIN_PBROADCASTW512_GPR,
31873 IX86_BUILTIN_PMOVSXBW512_MASK,
31874 IX86_BUILTIN_PMOVZXBW512_MASK,
31875 IX86_BUILTIN_VPERMVARHI512_MASK,
31876 IX86_BUILTIN_VPERMT2VARHI512,
31877 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31878 IX86_BUILTIN_VPERMI2VARHI512,
31879 IX86_BUILTIN_PAVGB512,
31880 IX86_BUILTIN_PAVGW512,
31881 IX86_BUILTIN_PADDB512,
31882 IX86_BUILTIN_PSUBB512,
31883 IX86_BUILTIN_PSUBSB512,
31884 IX86_BUILTIN_PADDSB512,
31885 IX86_BUILTIN_PSUBUSB512,
31886 IX86_BUILTIN_PADDUSB512,
31887 IX86_BUILTIN_PSUBW512,
31888 IX86_BUILTIN_PADDW512,
31889 IX86_BUILTIN_PSUBSW512,
31890 IX86_BUILTIN_PADDSW512,
31891 IX86_BUILTIN_PSUBUSW512,
31892 IX86_BUILTIN_PADDUSW512,
31893 IX86_BUILTIN_PMAXUW512,
31894 IX86_BUILTIN_PMAXSW512,
31895 IX86_BUILTIN_PMINUW512,
31896 IX86_BUILTIN_PMINSW512,
31897 IX86_BUILTIN_PMAXUB512,
31898 IX86_BUILTIN_PMAXSB512,
31899 IX86_BUILTIN_PMINUB512,
31900 IX86_BUILTIN_PMINSB512,
31901 IX86_BUILTIN_PMOVWB512,
31902 IX86_BUILTIN_PMOVSWB512,
31903 IX86_BUILTIN_PMOVUSWB512,
31904 IX86_BUILTIN_PMULHRSW512_MASK,
31905 IX86_BUILTIN_PMULHUW512_MASK,
31906 IX86_BUILTIN_PMULHW512_MASK,
31907 IX86_BUILTIN_PMULLW512_MASK,
31908 IX86_BUILTIN_PSLLWI512_MASK,
31909 IX86_BUILTIN_PSLLW512_MASK,
31910 IX86_BUILTIN_PACKSSWB512,
31911 IX86_BUILTIN_PACKUSWB512,
31912 IX86_BUILTIN_PSRAVV32HI,
31913 IX86_BUILTIN_PMADDUBSW512_MASK,
31914 IX86_BUILTIN_PMADDWD512_MASK,
31915 IX86_BUILTIN_PSRLVV32HI,
31916 IX86_BUILTIN_PUNPCKHBW512,
31917 IX86_BUILTIN_PUNPCKHWD512,
31918 IX86_BUILTIN_PUNPCKLBW512,
31919 IX86_BUILTIN_PUNPCKLWD512,
31920 IX86_BUILTIN_PSHUFB512,
31921 IX86_BUILTIN_PSHUFHW512,
31922 IX86_BUILTIN_PSHUFLW512,
31923 IX86_BUILTIN_PSRAWI512,
31924 IX86_BUILTIN_PSRAW512,
31925 IX86_BUILTIN_PSRLWI512,
31926 IX86_BUILTIN_PSRLW512,
31927 IX86_BUILTIN_CVTB2MASK512,
31928 IX86_BUILTIN_CVTW2MASK512,
31929 IX86_BUILTIN_CVTMASK2B512,
31930 IX86_BUILTIN_CVTMASK2W512,
31931 IX86_BUILTIN_PCMPEQB512_MASK,
31932 IX86_BUILTIN_PCMPEQW512_MASK,
31933 IX86_BUILTIN_PCMPGTB512_MASK,
31934 IX86_BUILTIN_PCMPGTW512_MASK,
31935 IX86_BUILTIN_PTESTMB512,
31936 IX86_BUILTIN_PTESTMW512,
31937 IX86_BUILTIN_PTESTNMB512,
31938 IX86_BUILTIN_PTESTNMW512,
31939 IX86_BUILTIN_PSLLVV32HI,
31940 IX86_BUILTIN_PABSB512,
31941 IX86_BUILTIN_PABSW512,
31942 IX86_BUILTIN_BLENDMW512,
31943 IX86_BUILTIN_BLENDMB512,
31944 IX86_BUILTIN_CMPB512,
31945 IX86_BUILTIN_CMPW512,
31946 IX86_BUILTIN_UCMPB512,
31947 IX86_BUILTIN_UCMPW512,
31949 /* Alternate 4 and 8 element gather/scatter for the vectorizer
31950 where all operands are 32-byte or 64-byte wide respectively. */
31951 IX86_BUILTIN_GATHERALTSIV4DF,
31952 IX86_BUILTIN_GATHERALTDIV8SF,
31953 IX86_BUILTIN_GATHERALTSIV4DI,
31954 IX86_BUILTIN_GATHERALTDIV8SI,
31955 IX86_BUILTIN_GATHER3ALTDIV16SF,
31956 IX86_BUILTIN_GATHER3ALTDIV16SI,
31957 IX86_BUILTIN_GATHER3ALTSIV4DF,
31958 IX86_BUILTIN_GATHER3ALTDIV8SF,
31959 IX86_BUILTIN_GATHER3ALTSIV4DI,
31960 IX86_BUILTIN_GATHER3ALTDIV8SI,
31961 IX86_BUILTIN_GATHER3ALTSIV8DF,
31962 IX86_BUILTIN_GATHER3ALTSIV8DI,
31963 IX86_BUILTIN_GATHER3DIV16SF,
31964 IX86_BUILTIN_GATHER3DIV16SI,
31965 IX86_BUILTIN_GATHER3DIV8DF,
31966 IX86_BUILTIN_GATHER3DIV8DI,
31967 IX86_BUILTIN_GATHER3SIV16SF,
31968 IX86_BUILTIN_GATHER3SIV16SI,
31969 IX86_BUILTIN_GATHER3SIV8DF,
31970 IX86_BUILTIN_GATHER3SIV8DI,
31971 IX86_BUILTIN_SCATTERALTSIV8DF,
31972 IX86_BUILTIN_SCATTERALTDIV16SF,
31973 IX86_BUILTIN_SCATTERALTSIV8DI,
31974 IX86_BUILTIN_SCATTERALTDIV16SI,
31975 IX86_BUILTIN_SCATTERDIV16SF,
31976 IX86_BUILTIN_SCATTERDIV16SI,
31977 IX86_BUILTIN_SCATTERDIV8DF,
31978 IX86_BUILTIN_SCATTERDIV8DI,
31979 IX86_BUILTIN_SCATTERSIV16SF,
31980 IX86_BUILTIN_SCATTERSIV16SI,
31981 IX86_BUILTIN_SCATTERSIV8DF,
31982 IX86_BUILTIN_SCATTERSIV8DI,
31985 IX86_BUILTIN_GATHERPFQPD,
31986 IX86_BUILTIN_GATHERPFDPS,
31987 IX86_BUILTIN_GATHERPFDPD,
31988 IX86_BUILTIN_GATHERPFQPS,
31989 IX86_BUILTIN_SCATTERPFDPD,
31990 IX86_BUILTIN_SCATTERPFDPS,
31991 IX86_BUILTIN_SCATTERPFQPD,
31992 IX86_BUILTIN_SCATTERPFQPS,
31995 IX86_BUILTIN_EXP2PD_MASK,
31996 IX86_BUILTIN_EXP2PS_MASK,
31997 IX86_BUILTIN_EXP2PS,
31998 IX86_BUILTIN_RCP28PD,
31999 IX86_BUILTIN_RCP28PS,
32000 IX86_BUILTIN_RCP28SD,
32001 IX86_BUILTIN_RCP28SS,
32002 IX86_BUILTIN_RSQRT28PD,
32003 IX86_BUILTIN_RSQRT28PS,
32004 IX86_BUILTIN_RSQRT28SD,
32005 IX86_BUILTIN_RSQRT28SS,
32008 IX86_BUILTIN_VPMADD52LUQ512,
32009 IX86_BUILTIN_VPMADD52HUQ512,
32010 IX86_BUILTIN_VPMADD52LUQ256,
32011 IX86_BUILTIN_VPMADD52HUQ256,
32012 IX86_BUILTIN_VPMADD52LUQ128,
32013 IX86_BUILTIN_VPMADD52HUQ128,
32014 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
32015 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
32016 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
32017 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
32018 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
32019 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
32022 IX86_BUILTIN_VPMULTISHIFTQB512,
32023 IX86_BUILTIN_VPMULTISHIFTQB256,
32024 IX86_BUILTIN_VPMULTISHIFTQB128,
32025 IX86_BUILTIN_VPERMVARQI512_MASK,
32026 IX86_BUILTIN_VPERMT2VARQI512,
32027 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
32028 IX86_BUILTIN_VPERMI2VARQI512,
32029 IX86_BUILTIN_VPERMVARQI256_MASK,
32030 IX86_BUILTIN_VPERMVARQI128_MASK,
32031 IX86_BUILTIN_VPERMT2VARQI256,
32032 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
32033 IX86_BUILTIN_VPERMT2VARQI128,
32034 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32035 IX86_BUILTIN_VPERMI2VARQI256,
32036 IX86_BUILTIN_VPERMI2VARQI128,
32038 /* SHA builtins. */
32039 IX86_BUILTIN_SHA1MSG1,
32040 IX86_BUILTIN_SHA1MSG2,
32041 IX86_BUILTIN_SHA1NEXTE,
32042 IX86_BUILTIN_SHA1RNDS4,
32043 IX86_BUILTIN_SHA256MSG1,
32044 IX86_BUILTIN_SHA256MSG2,
32045 IX86_BUILTIN_SHA256RNDS2,
32047 /* CLWB instructions. */
32050 /* PCOMMIT instructions. */
32051 IX86_BUILTIN_PCOMMIT,
32053 /* CLFLUSHOPT instructions. */
32054 IX86_BUILTIN_CLFLUSHOPT,
32056 /* TFmode support builtins. */
32058 IX86_BUILTIN_HUGE_VALQ,
32059 IX86_BUILTIN_FABSQ,
32060 IX86_BUILTIN_COPYSIGNQ,
32062 /* Vectorizer support builtins. */
32063 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32064 IX86_BUILTIN_CPYSGNPS,
32065 IX86_BUILTIN_CPYSGNPD,
32066 IX86_BUILTIN_CPYSGNPS256,
32067 IX86_BUILTIN_CPYSGNPS512,
32068 IX86_BUILTIN_CPYSGNPD256,
32069 IX86_BUILTIN_CPYSGNPD512,
32070 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32071 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32074 /* FMA4 instructions. */
32075 IX86_BUILTIN_VFMADDSS,
32076 IX86_BUILTIN_VFMADDSD,
32077 IX86_BUILTIN_VFMADDPS,
32078 IX86_BUILTIN_VFMADDPD,
32079 IX86_BUILTIN_VFMADDPS256,
32080 IX86_BUILTIN_VFMADDPD256,
32081 IX86_BUILTIN_VFMADDSUBPS,
32082 IX86_BUILTIN_VFMADDSUBPD,
32083 IX86_BUILTIN_VFMADDSUBPS256,
32084 IX86_BUILTIN_VFMADDSUBPD256,
32086 /* FMA3 instructions. */
32087 IX86_BUILTIN_VFMADDSS3,
32088 IX86_BUILTIN_VFMADDSD3,
32090 /* XOP instructions. */
32091 IX86_BUILTIN_VPCMOV,
32092 IX86_BUILTIN_VPCMOV_V2DI,
32093 IX86_BUILTIN_VPCMOV_V4SI,
32094 IX86_BUILTIN_VPCMOV_V8HI,
32095 IX86_BUILTIN_VPCMOV_V16QI,
32096 IX86_BUILTIN_VPCMOV_V4SF,
32097 IX86_BUILTIN_VPCMOV_V2DF,
32098 IX86_BUILTIN_VPCMOV256,
32099 IX86_BUILTIN_VPCMOV_V4DI256,
32100 IX86_BUILTIN_VPCMOV_V8SI256,
32101 IX86_BUILTIN_VPCMOV_V16HI256,
32102 IX86_BUILTIN_VPCMOV_V32QI256,
32103 IX86_BUILTIN_VPCMOV_V8SF256,
32104 IX86_BUILTIN_VPCMOV_V4DF256,
32106 IX86_BUILTIN_VPPERM,
32108 IX86_BUILTIN_VPMACSSWW,
32109 IX86_BUILTIN_VPMACSWW,
32110 IX86_BUILTIN_VPMACSSWD,
32111 IX86_BUILTIN_VPMACSWD,
32112 IX86_BUILTIN_VPMACSSDD,
32113 IX86_BUILTIN_VPMACSDD,
32114 IX86_BUILTIN_VPMACSSDQL,
32115 IX86_BUILTIN_VPMACSSDQH,
32116 IX86_BUILTIN_VPMACSDQL,
32117 IX86_BUILTIN_VPMACSDQH,
32118 IX86_BUILTIN_VPMADCSSWD,
32119 IX86_BUILTIN_VPMADCSWD,
32121 IX86_BUILTIN_VPHADDBW,
32122 IX86_BUILTIN_VPHADDBD,
32123 IX86_BUILTIN_VPHADDBQ,
32124 IX86_BUILTIN_VPHADDWD,
32125 IX86_BUILTIN_VPHADDWQ,
32126 IX86_BUILTIN_VPHADDDQ,
32127 IX86_BUILTIN_VPHADDUBW,
32128 IX86_BUILTIN_VPHADDUBD,
32129 IX86_BUILTIN_VPHADDUBQ,
32130 IX86_BUILTIN_VPHADDUWD,
32131 IX86_BUILTIN_VPHADDUWQ,
32132 IX86_BUILTIN_VPHADDUDQ,
32133 IX86_BUILTIN_VPHSUBBW,
32134 IX86_BUILTIN_VPHSUBWD,
32135 IX86_BUILTIN_VPHSUBDQ,
32137 IX86_BUILTIN_VPROTB,
32138 IX86_BUILTIN_VPROTW,
32139 IX86_BUILTIN_VPROTD,
32140 IX86_BUILTIN_VPROTQ,
32141 IX86_BUILTIN_VPROTB_IMM,
32142 IX86_BUILTIN_VPROTW_IMM,
32143 IX86_BUILTIN_VPROTD_IMM,
32144 IX86_BUILTIN_VPROTQ_IMM,
32146 IX86_BUILTIN_VPSHLB,
32147 IX86_BUILTIN_VPSHLW,
32148 IX86_BUILTIN_VPSHLD,
32149 IX86_BUILTIN_VPSHLQ,
32150 IX86_BUILTIN_VPSHAB,
32151 IX86_BUILTIN_VPSHAW,
32152 IX86_BUILTIN_VPSHAD,
32153 IX86_BUILTIN_VPSHAQ,
32155 IX86_BUILTIN_VFRCZSS,
32156 IX86_BUILTIN_VFRCZSD,
32157 IX86_BUILTIN_VFRCZPS,
32158 IX86_BUILTIN_VFRCZPD,
32159 IX86_BUILTIN_VFRCZPS256,
32160 IX86_BUILTIN_VFRCZPD256,
32162 IX86_BUILTIN_VPCOMEQUB,
32163 IX86_BUILTIN_VPCOMNEUB,
32164 IX86_BUILTIN_VPCOMLTUB,
32165 IX86_BUILTIN_VPCOMLEUB,
32166 IX86_BUILTIN_VPCOMGTUB,
32167 IX86_BUILTIN_VPCOMGEUB,
32168 IX86_BUILTIN_VPCOMFALSEUB,
32169 IX86_BUILTIN_VPCOMTRUEUB,
32171 IX86_BUILTIN_VPCOMEQUW,
32172 IX86_BUILTIN_VPCOMNEUW,
32173 IX86_BUILTIN_VPCOMLTUW,
32174 IX86_BUILTIN_VPCOMLEUW,
32175 IX86_BUILTIN_VPCOMGTUW,
32176 IX86_BUILTIN_VPCOMGEUW,
32177 IX86_BUILTIN_VPCOMFALSEUW,
32178 IX86_BUILTIN_VPCOMTRUEUW,
32180 IX86_BUILTIN_VPCOMEQUD,
32181 IX86_BUILTIN_VPCOMNEUD,
32182 IX86_BUILTIN_VPCOMLTUD,
32183 IX86_BUILTIN_VPCOMLEUD,
32184 IX86_BUILTIN_VPCOMGTUD,
32185 IX86_BUILTIN_VPCOMGEUD,
32186 IX86_BUILTIN_VPCOMFALSEUD,
32187 IX86_BUILTIN_VPCOMTRUEUD,
32189 IX86_BUILTIN_VPCOMEQUQ,
32190 IX86_BUILTIN_VPCOMNEUQ,
32191 IX86_BUILTIN_VPCOMLTUQ,
32192 IX86_BUILTIN_VPCOMLEUQ,
32193 IX86_BUILTIN_VPCOMGTUQ,
32194 IX86_BUILTIN_VPCOMGEUQ,
32195 IX86_BUILTIN_VPCOMFALSEUQ,
32196 IX86_BUILTIN_VPCOMTRUEUQ,
32198 IX86_BUILTIN_VPCOMEQB,
32199 IX86_BUILTIN_VPCOMNEB,
32200 IX86_BUILTIN_VPCOMLTB,
32201 IX86_BUILTIN_VPCOMLEB,
32202 IX86_BUILTIN_VPCOMGTB,
32203 IX86_BUILTIN_VPCOMGEB,
32204 IX86_BUILTIN_VPCOMFALSEB,
32205 IX86_BUILTIN_VPCOMTRUEB,
32207 IX86_BUILTIN_VPCOMEQW,
32208 IX86_BUILTIN_VPCOMNEW,
32209 IX86_BUILTIN_VPCOMLTW,
32210 IX86_BUILTIN_VPCOMLEW,
32211 IX86_BUILTIN_VPCOMGTW,
32212 IX86_BUILTIN_VPCOMGEW,
32213 IX86_BUILTIN_VPCOMFALSEW,
32214 IX86_BUILTIN_VPCOMTRUEW,
32216 IX86_BUILTIN_VPCOMEQD,
32217 IX86_BUILTIN_VPCOMNED,
32218 IX86_BUILTIN_VPCOMLTD,
32219 IX86_BUILTIN_VPCOMLED,
32220 IX86_BUILTIN_VPCOMGTD,
32221 IX86_BUILTIN_VPCOMGED,
32222 IX86_BUILTIN_VPCOMFALSED,
32223 IX86_BUILTIN_VPCOMTRUED,
32225 IX86_BUILTIN_VPCOMEQQ,
32226 IX86_BUILTIN_VPCOMNEQ,
32227 IX86_BUILTIN_VPCOMLTQ,
32228 IX86_BUILTIN_VPCOMLEQ,
32229 IX86_BUILTIN_VPCOMGTQ,
32230 IX86_BUILTIN_VPCOMGEQ,
32231 IX86_BUILTIN_VPCOMFALSEQ,
32232 IX86_BUILTIN_VPCOMTRUEQ,
32234 /* LWP instructions. */
32235 IX86_BUILTIN_LLWPCB,
32236 IX86_BUILTIN_SLWPCB,
32237 IX86_BUILTIN_LWPVAL32,
32238 IX86_BUILTIN_LWPVAL64,
32239 IX86_BUILTIN_LWPINS32,
32240 IX86_BUILTIN_LWPINS64,
32245 IX86_BUILTIN_XBEGIN,
32247 IX86_BUILTIN_XABORT,
32248 IX86_BUILTIN_XTEST,
32251 IX86_BUILTIN_BNDMK,
32252 IX86_BUILTIN_BNDSTX,
32253 IX86_BUILTIN_BNDLDX,
32254 IX86_BUILTIN_BNDCL,
32255 IX86_BUILTIN_BNDCU,
32256 IX86_BUILTIN_BNDRET,
32257 IX86_BUILTIN_BNDNARROW,
32258 IX86_BUILTIN_BNDINT,
32259 IX86_BUILTIN_SIZEOF,
32260 IX86_BUILTIN_BNDLOWER,
32261 IX86_BUILTIN_BNDUPPER,
32263 /* BMI instructions. */
32264 IX86_BUILTIN_BEXTR32,
32265 IX86_BUILTIN_BEXTR64,
32268 /* TBM instructions. */
32269 IX86_BUILTIN_BEXTRI32,
32270 IX86_BUILTIN_BEXTRI64,
32272 /* BMI2 instructions. */
32273 IX86_BUILTIN_BZHI32,
32274 IX86_BUILTIN_BZHI64,
32275 IX86_BUILTIN_PDEP32,
32276 IX86_BUILTIN_PDEP64,
32277 IX86_BUILTIN_PEXT32,
32278 IX86_BUILTIN_PEXT64,
32280 /* ADX instructions. */
32281 IX86_BUILTIN_ADDCARRYX32,
32282 IX86_BUILTIN_ADDCARRYX64,
32284 /* SBB instructions. */
32285 IX86_BUILTIN_SBB32,
32286 IX86_BUILTIN_SBB64,
32288 /* FSGSBASE instructions. */
32289 IX86_BUILTIN_RDFSBASE32,
32290 IX86_BUILTIN_RDFSBASE64,
32291 IX86_BUILTIN_RDGSBASE32,
32292 IX86_BUILTIN_RDGSBASE64,
32293 IX86_BUILTIN_WRFSBASE32,
32294 IX86_BUILTIN_WRFSBASE64,
32295 IX86_BUILTIN_WRGSBASE32,
32296 IX86_BUILTIN_WRGSBASE64,
32298 /* RDRND instructions. */
32299 IX86_BUILTIN_RDRAND16_STEP,
32300 IX86_BUILTIN_RDRAND32_STEP,
32301 IX86_BUILTIN_RDRAND64_STEP,
32303 /* RDSEED instructions. */
32304 IX86_BUILTIN_RDSEED16_STEP,
32305 IX86_BUILTIN_RDSEED32_STEP,
32306 IX86_BUILTIN_RDSEED64_STEP,
32308 /* F16C instructions. */
32309 IX86_BUILTIN_CVTPH2PS,
32310 IX86_BUILTIN_CVTPH2PS256,
32311 IX86_BUILTIN_CVTPS2PH,
32312 IX86_BUILTIN_CVTPS2PH256,
32314 /* MONITORX and MWAITX instrucions. */
32315 IX86_BUILTIN_MONITORX,
32316 IX86_BUILTIN_MWAITX,
32318 /* CFString built-in for darwin */
32319 IX86_BUILTIN_CFSTRING,
32321 /* Builtins to get CPU type and supported features. */
32322 IX86_BUILTIN_CPU_INIT,
32323 IX86_BUILTIN_CPU_IS,
32324 IX86_BUILTIN_CPU_SUPPORTS,
32326 /* Read/write FLAGS register built-ins. */
32327 IX86_BUILTIN_READ_FLAGS,
32328 IX86_BUILTIN_WRITE_FLAGS,
32330 /* PKU instructions. */
32331 IX86_BUILTIN_RDPKRU,
32332 IX86_BUILTIN_WRPKRU,
32337 /* Table for the ix86 builtin decls. */
32338 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
32340 /* Table of all of the builtin functions that are possible with different ISA's
32341 but are waiting to be built until a function is declared to use that
32343 struct builtin_isa {
32344 const char *name; /* function name */
32345 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
32346 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
32347 bool const_p; /* true if the declaration is constant */
32348 bool leaf_p; /* true if the declaration has leaf attribute */
32349 bool nothrow_p; /* true if the declaration has nothrow attribute */
32350 bool set_and_not_built_p;
32353 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
32355 /* Bits that can still enable any inclusion of a builtin. */
32356 static HOST_WIDE_INT deferred_isa_values = 0;
32358 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32359 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32360 function decl in the ix86_builtins array. Returns the function decl or
32361 NULL_TREE, if the builtin was not added.
32363 If the front end has a special hook for builtin functions, delay adding
32364 builtin functions that aren't in the current ISA until the ISA is changed
32365 with function specific optimization. Doing so, can save about 300K for the
32366 default compiler. When the builtin is expanded, check at that time whether
32369 If the front end doesn't have a special hook, record all builtins, even if
32370 it isn't an instruction set in the current ISA in case the user uses
32371 function specific options for a different ISA, so that we don't get scope
32372 errors if a builtin is added in the middle of a function scope. */
32375 def_builtin (HOST_WIDE_INT mask, const char *name,
32376 enum ix86_builtin_func_type tcode,
32377 enum ix86_builtins code)
32379 tree decl = NULL_TREE;
32381 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
32383 ix86_builtins_isa[(int) code].isa = mask;
32385 mask &= ~OPTION_MASK_ISA_64BIT;
32387 || (mask & ix86_isa_flags) != 0
32388 || (lang_hooks.builtin_function
32389 == lang_hooks.builtin_function_ext_scope))
32392 tree type = ix86_get_builtin_func_type (tcode);
32393 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32395 ix86_builtins[(int) code] = decl;
32396 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
32400 /* Just a MASK where set_and_not_built_p == true can potentially
32401 include a builtin. */
32402 deferred_isa_values |= mask;
32403 ix86_builtins[(int) code] = NULL_TREE;
32404 ix86_builtins_isa[(int) code].tcode = tcode;
32405 ix86_builtins_isa[(int) code].name = name;
32406 ix86_builtins_isa[(int) code].leaf_p = false;
32407 ix86_builtins_isa[(int) code].nothrow_p = false;
32408 ix86_builtins_isa[(int) code].const_p = false;
32409 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
32416 /* Like def_builtin, but also marks the function decl "const". */
32419 def_builtin_const (HOST_WIDE_INT mask, const char *name,
32420 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
32422 tree decl = def_builtin (mask, name, tcode, code);
32424 TREE_READONLY (decl) = 1;
32426 ix86_builtins_isa[(int) code].const_p = true;
32431 /* Add any new builtin functions for a given ISA that may not have been
32432 declared. This saves a bit of space compared to adding all of the
32433 declarations to the tree, even if we didn't use them. */
32436 ix86_add_new_builtins (HOST_WIDE_INT isa)
32438 if ((isa & deferred_isa_values) == 0)
32441 /* Bits in ISA value can be removed from potential isa values. */
32442 deferred_isa_values &= ~isa;
32445 tree saved_current_target_pragma = current_target_pragma;
32446 current_target_pragma = NULL_TREE;
32448 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
32450 if ((ix86_builtins_isa[i].isa & isa) != 0
32451 && ix86_builtins_isa[i].set_and_not_built_p)
32455 /* Don't define the builtin again. */
32456 ix86_builtins_isa[i].set_and_not_built_p = false;
32458 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
32459 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
32460 type, i, BUILT_IN_MD, NULL,
32463 ix86_builtins[i] = decl;
32464 if (ix86_builtins_isa[i].const_p)
32465 TREE_READONLY (decl) = 1;
32466 if (ix86_builtins_isa[i].leaf_p)
32467 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32469 if (ix86_builtins_isa[i].nothrow_p)
32470 TREE_NOTHROW (decl) = 1;
32474 current_target_pragma = saved_current_target_pragma;
32477 /* Bits for builtin_description.flag. */
32479 /* Set when we don't support the comparison natively, and should
32480 swap_comparison in order to support it. */
32481 #define BUILTIN_DESC_SWAP_OPERANDS 1
32483 struct builtin_description
32485 const HOST_WIDE_INT mask;
32486 const enum insn_code icode;
32487 const char *const name;
32488 const enum ix86_builtins code;
32489 const enum rtx_code comparison;
32493 static const struct builtin_description bdesc_comi[] =
32495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32521 static const struct builtin_description bdesc_pcmpestr[] =
32524 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32525 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32526 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32527 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32528 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32529 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32530 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32533 static const struct builtin_description bdesc_pcmpistr[] =
32536 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32537 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32538 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32539 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32540 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32541 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32542 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32545 /* Special builtins with variable number of arguments. */
32546 static const struct builtin_description bdesc_special_args[] =
32548 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32549 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32550 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32552 /* 80387 (for use internally for atomic compound assignment). */
32553 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32554 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32555 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32556 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32559 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32562 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32564 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32565 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32566 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32567 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32568 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32569 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32570 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32571 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32572 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32574 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32575 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32576 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32577 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32578 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32579 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32580 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32581 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32593 /* SSE or 3DNow!A */
32594 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32595 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32605 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32613 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32619 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32620 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32626 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32627 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32628 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32654 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32713 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32714 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32715 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32716 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32717 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32718 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32721 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32722 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32723 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32724 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32725 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32726 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32727 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32728 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32731 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32732 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32733 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32744 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32745 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32778 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32779 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32780 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32781 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32838 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32840 /* RDPKRU and WRPKRU. */
32841 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32842 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
32845 /* Builtins with variable number of arguments. */
32846 static const struct builtin_description bdesc_args[] =
32848 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32849 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32850 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32851 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32852 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32853 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32854 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32859 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32865 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32871 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32874 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32879 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32884 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32885 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32886 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32888 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32889 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32890 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32891 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32892 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32893 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32895 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32896 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32897 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32899 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32901 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32902 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32903 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32904 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32906 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32913 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32921 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32922 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32923 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32924 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32926 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32927 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32928 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32929 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32930 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32931 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32932 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32933 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32934 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32935 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32936 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32937 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32938 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32939 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32940 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32943 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32944 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32945 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32946 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32947 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32948 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
32952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32953 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32955 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32959 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32962 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32966 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32967 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32968 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
32986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32996 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32997 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33001 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33003 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33004 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33006 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33011 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33012 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
33015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
33016 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
33018 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
33020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33024 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
33025 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
33027 /* SSE MMX or 3Dnow!A */
33028 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33029 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33030 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33032 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33034 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33035 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33037 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33038 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33040 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33049 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33061 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33062 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33066 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33068 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33069 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33070 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33071 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
33082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33099 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33103 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33105 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33106 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33108 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33111 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33112 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33114 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33116 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33117 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33118 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33119 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33120 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33121 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33122 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33123 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33134 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33135 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
33137 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33139 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33140 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33152 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33153 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33154 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33157 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33158 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33159 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33160 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33161 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33162 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33163 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33164 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33170 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
33173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
33174 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
33178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
33179 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
33180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
33181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
33183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33184 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33185 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33186 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33187 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33188 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33189 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33192 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33193 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33194 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33195 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33196 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33197 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33199 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33200 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33201 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33202 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
33205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
33210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33213 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33214 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33217 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
33218 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33220 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33221 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33222 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33223 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33224 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33225 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33229 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
33230 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33231 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
33232 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33233 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33235 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33236 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33237 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33238 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33239 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33240 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33241 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33242 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33243 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33244 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33245 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33246 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33247 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
33248 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
33249 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33250 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33251 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33253 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33254 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33255 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
33262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
33265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
33268 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
33269 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33271 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33272 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
33273 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
33274 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
33276 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33286 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33290 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33291 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33294 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33295 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33296 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33297 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33298 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33299 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33300 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33301 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33304 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33305 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33306 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33307 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33309 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
33310 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
33311 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
33312 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
33314 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33315 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33317 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33318 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33320 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
33321 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
33322 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
33323 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
33325 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
33326 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
33328 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33329 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33331 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33332 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33333 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33336 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33337 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
33338 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
33339 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33340 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33343 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
33344 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
33345 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
33346 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33349 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
33350 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33352 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33353 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33354 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33355 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33358 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
33361 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33362 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33365 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33366 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33369 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33375 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33376 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33377 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33378 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33380 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33381 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33382 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33383 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33384 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33385 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33386 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
33389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
33390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
33391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
33396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
33397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
33407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
33408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
33409 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
33410 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
33411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
33412 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33413 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
33414 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33416 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
33425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
33426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
33428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33432 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33434 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33436 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
33444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
33445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
33446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
33448 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33449 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
33457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
33460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
33462 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33463 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33473 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
33474 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
33475 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
33477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33496 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33497 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33499 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33502 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33503 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33504 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33505 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33506 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33507 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33508 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33509 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33510 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33511 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33512 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33513 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33514 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33515 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33516 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33517 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33518 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33519 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33520 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33521 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33522 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33523 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33524 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33525 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33526 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33527 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33528 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33529 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33531 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33532 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33533 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33534 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33535 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33536 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33537 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33538 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33539 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33540 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33541 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33542 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33543 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33544 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33545 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33546 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33547 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33548 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33549 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33550 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33551 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33552 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33553 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33554 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33555 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33557 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33559 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33560 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33564 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33565 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33566 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33567 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33568 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33569 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33570 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33571 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33572 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33583 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33584 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33585 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33586 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33587 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33588 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33589 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33590 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33591 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33592 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33594 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33595 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33596 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33597 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33598 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33599 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33600 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33601 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33602 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33603 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33615 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33616 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33649 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33652 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33653 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33654 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33657 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33658 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33661 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33662 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33663 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33664 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33667 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33668 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33669 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33670 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33671 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33672 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33730 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33731 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33841 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33842 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33843 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33844 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33876 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33881 /* Mask arithmetic operations */
33882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33903 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33904 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33913 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33916 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33941 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33942 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33943 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33944 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33945 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33946 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33947 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33948 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33949 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33950 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33951 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33952 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33953 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
33957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
33958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
33959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
33960 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33961 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33962 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33963 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33964 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33965 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33966 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33967 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33970 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33971 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33972 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33973 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
33993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
33994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
33995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
33996 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33997 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
33998 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
33999 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
34000 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34001 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
34006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
34007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
34008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
34010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
34011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
34013 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
34014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
34015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
34016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
34017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34041 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34042 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34044 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34045 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34046 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34049 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34050 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34051 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34072 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34075 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34076 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34077 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34078 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34079 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34080 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34081 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34082 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34083 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34084 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34087 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34088 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34090 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34091 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34092 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34095 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34096 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34098 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34103 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34104 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34105 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34106 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34107 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34108 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34139 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34140 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34141 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34142 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34165 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34166 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34167 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34168 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34169 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34170 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34171 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34172 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34173 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34174 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34175 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34176 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34224 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34285 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
34286 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
34287 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
34290 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
34291 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
34292 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
34293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34299 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34300 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34301 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34302 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34313 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34314 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34315 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34316 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34317 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34318 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34319 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34320 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34377 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34378 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34379 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34380 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34384 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34393 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
34394 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
34395 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
34396 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
34397 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
34398 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
34399 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
34400 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
34401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
34402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
34403 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
34404 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
34405 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
34406 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
34407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
34408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
34409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
34410 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
34411 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
34412 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
34413 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34414 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34415 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34416 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34426 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34432 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34434 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34439 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34447 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34448 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34449 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
34450 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
34451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34507 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34508 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34509 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34510 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34518 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34519 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34520 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34521 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34543 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34544 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34554 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34555 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34560 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34561 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34580 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34581 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34599 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34600 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34601 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34602 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34617 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34618 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34619 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34620 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34621 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34622 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34623 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34624 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34625 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34626 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34627 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34628 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34629 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34630 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34631 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34632 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34633 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34634 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34635 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34636 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34637 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34638 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34639 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34640 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34641 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34642 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34643 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34644 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34645 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34646 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34647 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34650 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34651 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34670 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34671 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34672 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34673 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34674 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34675 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34676 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34677 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34678 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34679 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34681 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34682 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34683 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34684 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34685 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34686 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34687 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34688 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34689 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34690 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34691 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34692 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34693 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34694 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34695 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34696 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34697 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34698 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34699 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34700 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34705 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34706 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34707 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34708 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34709 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34710 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34711 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34712 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34713 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34714 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34715 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34716 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34717 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34718 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34719 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34720 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34721 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34743 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34744 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34745 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34746 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34747 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34748 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34749 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34750 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34751 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34752 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34753 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34754 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34757 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34758 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34759 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34760 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34761 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34762 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34763 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34764 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34765 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34766 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34767 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34768 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34769 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34770 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34771 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34774 /* Builtins with rounding support. */
34775 static const struct builtin_description bdesc_round_args[] =
34778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34797 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34799 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34806 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34808 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34858 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34860 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34862 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34864 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34866 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34868 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34870 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34872 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34899 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34900 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34901 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34902 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34903 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34904 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34905 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34906 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34907 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34908 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34911 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34912 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34913 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34914 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34915 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34916 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34917 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34918 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34919 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34920 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34921 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34922 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34923 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34924 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34925 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34926 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
34929 /* Bultins for MPX. */
34930 static const struct builtin_description bdesc_mpx[] =
34932 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
34933 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34934 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34937 /* Const builtins for MPX. */
34938 static const struct builtin_description bdesc_mpx_const[] =
34940 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
34941 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
34942 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
34943 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
34944 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
34945 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
34946 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
34947 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
34950 /* FMA4 and XOP. */
34951 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
34952 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
34953 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
34954 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
34955 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
34956 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
34957 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
34958 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
34959 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
34960 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
34961 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
34962 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
34963 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
34964 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
34965 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
34966 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
34967 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
34968 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
34969 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
34970 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
34971 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
34972 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
34973 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
34974 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
34975 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
34976 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
34977 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
34978 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
34979 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
34980 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
34981 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
34982 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
34983 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
34984 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
34985 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
34986 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
34987 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
34988 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
34989 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
34990 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
34991 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
34992 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
34993 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
34994 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
34995 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
34996 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
34997 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
34998 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
34999 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
35000 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
35001 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
35002 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
35004 static const struct builtin_description bdesc_multi_arg[] =
35006 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
35007 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
35008 UNKNOWN, (int)MULTI_ARG_3_SF },
35009 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
35010 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
35011 UNKNOWN, (int)MULTI_ARG_3_DF },
35013 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
35014 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
35015 UNKNOWN, (int)MULTI_ARG_3_SF },
35016 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
35017 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
35018 UNKNOWN, (int)MULTI_ARG_3_DF },
35020 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
35021 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
35022 UNKNOWN, (int)MULTI_ARG_3_SF },
35023 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
35024 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
35025 UNKNOWN, (int)MULTI_ARG_3_DF },
35026 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
35027 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
35028 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35029 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
35030 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
35031 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35033 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
35034 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35035 UNKNOWN, (int)MULTI_ARG_3_SF },
35036 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35037 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35038 UNKNOWN, (int)MULTI_ARG_3_DF },
35039 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35040 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35041 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35042 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35043 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35044 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
35079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
35080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
35081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
35082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
35083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
35084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
35085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
35087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
35088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
35089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
35091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
35092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
35094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
35095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
35096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
35097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
35098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
35099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
35101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
35118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
35121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
35122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
35123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
35125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
35126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
35129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
35130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
35131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
35133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
35134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
35137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
35138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
35139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
35141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
35145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
35146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
35147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
35149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
35150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
35153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
35154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
35155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
35157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
35158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
35161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
35162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
35163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
35165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
35166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
35169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
35170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
35171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
35173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
35177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
35178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
35179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
35181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
35200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
35201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
35202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
35206 /* TM vector builtins. */
35208 /* Reuse the existing x86-specific `struct builtin_description' cause
35209 we're lazy. Add casts to make them fit. */
35210 static const struct builtin_description bdesc_tm[] =
35212 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35213 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35214 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35215 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35216 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35217 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35218 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35220 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35221 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35222 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35223 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35224 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35225 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35226 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35228 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35229 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35230 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35231 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35232 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35233 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35234 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35236 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
35237 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
35238 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
35241 /* Initialize the transactional memory vector load/store builtins. */
35244 ix86_init_tm_builtins (void)
35246 enum ix86_builtin_func_type ftype;
35247 const struct builtin_description *d;
35250 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
35251 tree attrs_log, attrs_type_log;
35256 /* If there are no builtins defined, we must be compiling in a
35257 language without trans-mem support. */
35258 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
35261 /* Use whatever attributes a normal TM load has. */
35262 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
35263 attrs_load = DECL_ATTRIBUTES (decl);
35264 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35265 /* Use whatever attributes a normal TM store has. */
35266 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
35267 attrs_store = DECL_ATTRIBUTES (decl);
35268 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35269 /* Use whatever attributes a normal TM log has. */
35270 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
35271 attrs_log = DECL_ATTRIBUTES (decl);
35272 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35274 for (i = 0, d = bdesc_tm;
35275 i < ARRAY_SIZE (bdesc_tm);
35278 if ((d->mask & ix86_isa_flags) != 0
35279 || (lang_hooks.builtin_function
35280 == lang_hooks.builtin_function_ext_scope))
35282 tree type, attrs, attrs_type;
35283 enum built_in_function code = (enum built_in_function) d->code;
35285 ftype = (enum ix86_builtin_func_type) d->flag;
35286 type = ix86_get_builtin_func_type (ftype);
35288 if (BUILTIN_TM_LOAD_P (code))
35290 attrs = attrs_load;
35291 attrs_type = attrs_type_load;
35293 else if (BUILTIN_TM_STORE_P (code))
35295 attrs = attrs_store;
35296 attrs_type = attrs_type_store;
35301 attrs_type = attrs_type_log;
35303 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
35304 /* The builtin without the prefix for
35305 calling it directly. */
35306 d->name + strlen ("__builtin_"),
35308 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
35309 set the TYPE_ATTRIBUTES. */
35310 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
35312 set_builtin_decl (code, decl, false);
35317 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
35318 in the current target ISA to allow the user to compile particular modules
35319 with different target specific options that differ from the command line
35322 ix86_init_mmx_sse_builtins (void)
35324 const struct builtin_description * d;
35325 enum ix86_builtin_func_type ftype;
35328 /* Add all special builtins with variable number of operands. */
35329 for (i = 0, d = bdesc_special_args;
35330 i < ARRAY_SIZE (bdesc_special_args);
35336 ftype = (enum ix86_builtin_func_type) d->flag;
35337 def_builtin (d->mask, d->name, ftype, d->code);
35340 /* Add all builtins with variable number of operands. */
35341 for (i = 0, d = bdesc_args;
35342 i < ARRAY_SIZE (bdesc_args);
35348 ftype = (enum ix86_builtin_func_type) d->flag;
35349 def_builtin_const (d->mask, d->name, ftype, d->code);
35352 /* Add all builtins with rounding. */
35353 for (i = 0, d = bdesc_round_args;
35354 i < ARRAY_SIZE (bdesc_round_args);
35360 ftype = (enum ix86_builtin_func_type) d->flag;
35361 def_builtin_const (d->mask, d->name, ftype, d->code);
35364 /* pcmpestr[im] insns. */
35365 for (i = 0, d = bdesc_pcmpestr;
35366 i < ARRAY_SIZE (bdesc_pcmpestr);
35369 if (d->code == IX86_BUILTIN_PCMPESTRM128)
35370 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
35372 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
35373 def_builtin_const (d->mask, d->name, ftype, d->code);
35376 /* pcmpistr[im] insns. */
35377 for (i = 0, d = bdesc_pcmpistr;
35378 i < ARRAY_SIZE (bdesc_pcmpistr);
35381 if (d->code == IX86_BUILTIN_PCMPISTRM128)
35382 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
35384 ftype = INT_FTYPE_V16QI_V16QI_INT;
35385 def_builtin_const (d->mask, d->name, ftype, d->code);
35388 /* comi/ucomi insns. */
35389 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35391 if (d->mask == OPTION_MASK_ISA_SSE2)
35392 ftype = INT_FTYPE_V2DF_V2DF;
35394 ftype = INT_FTYPE_V4SF_V4SF;
35395 def_builtin_const (d->mask, d->name, ftype, d->code);
35399 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
35400 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
35401 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
35402 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
35404 /* SSE or 3DNow!A */
35405 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35406 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
35407 IX86_BUILTIN_MASKMOVQ);
35410 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
35411 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
35413 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
35414 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
35415 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
35416 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
35419 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
35420 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
35421 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
35422 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
35425 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
35426 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
35427 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
35428 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
35429 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
35430 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
35431 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
35432 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
35433 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
35434 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
35435 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
35436 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
35439 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
35440 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
35443 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35444 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35445 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35446 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35447 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35448 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35449 IX86_BUILTIN_RDRAND64_STEP);
35452 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35453 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35454 IX86_BUILTIN_GATHERSIV2DF);
35456 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35457 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35458 IX86_BUILTIN_GATHERSIV4DF);
35460 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35461 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35462 IX86_BUILTIN_GATHERDIV2DF);
35464 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35465 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35466 IX86_BUILTIN_GATHERDIV4DF);
35468 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35469 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35470 IX86_BUILTIN_GATHERSIV4SF);
35472 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35473 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35474 IX86_BUILTIN_GATHERSIV8SF);
35476 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35477 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35478 IX86_BUILTIN_GATHERDIV4SF);
35480 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35481 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35482 IX86_BUILTIN_GATHERDIV8SF);
35484 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35485 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35486 IX86_BUILTIN_GATHERSIV2DI);
35488 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35489 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35490 IX86_BUILTIN_GATHERSIV4DI);
35492 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35493 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35494 IX86_BUILTIN_GATHERDIV2DI);
35496 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35497 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35498 IX86_BUILTIN_GATHERDIV4DI);
35500 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35501 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35502 IX86_BUILTIN_GATHERSIV4SI);
35504 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35505 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35506 IX86_BUILTIN_GATHERSIV8SI);
35508 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35509 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35510 IX86_BUILTIN_GATHERDIV4SI);
35512 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35513 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35514 IX86_BUILTIN_GATHERDIV8SI);
35516 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35517 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35518 IX86_BUILTIN_GATHERALTSIV4DF);
35520 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35521 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35522 IX86_BUILTIN_GATHERALTDIV8SF);
35524 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35525 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35526 IX86_BUILTIN_GATHERALTSIV4DI);
35528 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35529 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35530 IX86_BUILTIN_GATHERALTDIV8SI);
35533 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35534 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35535 IX86_BUILTIN_GATHER3SIV16SF);
35537 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35538 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35539 IX86_BUILTIN_GATHER3SIV8DF);
35541 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35542 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35543 IX86_BUILTIN_GATHER3DIV16SF);
35545 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35546 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35547 IX86_BUILTIN_GATHER3DIV8DF);
35549 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35550 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35551 IX86_BUILTIN_GATHER3SIV16SI);
35553 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35554 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35555 IX86_BUILTIN_GATHER3SIV8DI);
35557 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35558 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35559 IX86_BUILTIN_GATHER3DIV16SI);
35561 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35562 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35563 IX86_BUILTIN_GATHER3DIV8DI);
35565 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35566 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35567 IX86_BUILTIN_GATHER3ALTSIV8DF);
35569 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35570 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35571 IX86_BUILTIN_GATHER3ALTDIV16SF);
35573 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35574 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35575 IX86_BUILTIN_GATHER3ALTSIV8DI);
35577 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35578 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35579 IX86_BUILTIN_GATHER3ALTDIV16SI);
35581 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35582 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35583 IX86_BUILTIN_SCATTERSIV16SF);
35585 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35586 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35587 IX86_BUILTIN_SCATTERSIV8DF);
35589 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35590 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35591 IX86_BUILTIN_SCATTERDIV16SF);
35593 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35594 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35595 IX86_BUILTIN_SCATTERDIV8DF);
35597 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35598 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35599 IX86_BUILTIN_SCATTERSIV16SI);
35601 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35602 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35603 IX86_BUILTIN_SCATTERSIV8DI);
35605 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35606 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35607 IX86_BUILTIN_SCATTERDIV16SI);
35609 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35610 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35611 IX86_BUILTIN_SCATTERDIV8DI);
35614 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35615 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35616 IX86_BUILTIN_GATHER3SIV2DF);
35618 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35619 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35620 IX86_BUILTIN_GATHER3SIV4DF);
35622 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35623 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35624 IX86_BUILTIN_GATHER3DIV2DF);
35626 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35627 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35628 IX86_BUILTIN_GATHER3DIV4DF);
35630 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35631 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35632 IX86_BUILTIN_GATHER3SIV4SF);
35634 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35635 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35636 IX86_BUILTIN_GATHER3SIV8SF);
35638 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35639 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35640 IX86_BUILTIN_GATHER3DIV4SF);
35642 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35643 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35644 IX86_BUILTIN_GATHER3DIV8SF);
35646 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35647 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35648 IX86_BUILTIN_GATHER3SIV2DI);
35650 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35651 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35652 IX86_BUILTIN_GATHER3SIV4DI);
35654 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35655 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35656 IX86_BUILTIN_GATHER3DIV2DI);
35658 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35659 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35660 IX86_BUILTIN_GATHER3DIV4DI);
35662 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35663 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35664 IX86_BUILTIN_GATHER3SIV4SI);
35666 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35667 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35668 IX86_BUILTIN_GATHER3SIV8SI);
35670 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35671 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35672 IX86_BUILTIN_GATHER3DIV4SI);
35674 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35675 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35676 IX86_BUILTIN_GATHER3DIV8SI);
35678 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35679 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35680 IX86_BUILTIN_GATHER3ALTSIV4DF);
35682 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35683 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35684 IX86_BUILTIN_GATHER3ALTDIV8SF);
35686 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35687 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35688 IX86_BUILTIN_GATHER3ALTSIV4DI);
35690 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35691 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35692 IX86_BUILTIN_GATHER3ALTDIV8SI);
35694 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35695 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35696 IX86_BUILTIN_SCATTERSIV8SF);
35698 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35699 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35700 IX86_BUILTIN_SCATTERSIV4SF);
35702 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35703 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35704 IX86_BUILTIN_SCATTERSIV4DF);
35706 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35707 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35708 IX86_BUILTIN_SCATTERSIV2DF);
35710 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35711 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35712 IX86_BUILTIN_SCATTERDIV8SF);
35714 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35715 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35716 IX86_BUILTIN_SCATTERDIV4SF);
35718 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35719 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35720 IX86_BUILTIN_SCATTERDIV4DF);
35722 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35723 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35724 IX86_BUILTIN_SCATTERDIV2DF);
35726 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35727 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35728 IX86_BUILTIN_SCATTERSIV8SI);
35730 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35731 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35732 IX86_BUILTIN_SCATTERSIV4SI);
35734 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35735 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35736 IX86_BUILTIN_SCATTERSIV4DI);
35738 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35739 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35740 IX86_BUILTIN_SCATTERSIV2DI);
35742 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35743 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35744 IX86_BUILTIN_SCATTERDIV8SI);
35746 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35747 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35748 IX86_BUILTIN_SCATTERDIV4SI);
35750 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35751 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35752 IX86_BUILTIN_SCATTERDIV4DI);
35754 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35755 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35756 IX86_BUILTIN_SCATTERDIV2DI);
35757 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35758 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35759 IX86_BUILTIN_SCATTERALTSIV8DF);
35761 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35762 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35763 IX86_BUILTIN_SCATTERALTDIV16SF);
35765 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35766 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35767 IX86_BUILTIN_SCATTERALTSIV8DI);
35769 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35770 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35771 IX86_BUILTIN_SCATTERALTDIV16SI);
35774 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35775 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35776 IX86_BUILTIN_GATHERPFDPD);
35777 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35778 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35779 IX86_BUILTIN_GATHERPFDPS);
35780 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35781 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35782 IX86_BUILTIN_GATHERPFQPD);
35783 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35784 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35785 IX86_BUILTIN_GATHERPFQPS);
35786 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35787 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35788 IX86_BUILTIN_SCATTERPFDPD);
35789 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35790 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35791 IX86_BUILTIN_SCATTERPFDPS);
35792 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35793 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35794 IX86_BUILTIN_SCATTERPFQPD);
35795 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35796 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35797 IX86_BUILTIN_SCATTERPFQPS);
35800 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35801 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35802 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35803 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35804 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35805 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35806 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35807 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35808 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35809 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35810 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35811 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35812 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35813 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35816 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35817 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35819 /* MMX access to the vec_init patterns. */
35820 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35821 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35823 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35824 V4HI_FTYPE_HI_HI_HI_HI,
35825 IX86_BUILTIN_VEC_INIT_V4HI);
35827 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35828 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35829 IX86_BUILTIN_VEC_INIT_V8QI);
35831 /* Access to the vec_extract patterns. */
35832 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35833 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35834 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35835 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35836 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35837 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35838 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35839 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35840 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35841 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35843 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35844 "__builtin_ia32_vec_ext_v4hi",
35845 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35847 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35848 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35850 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35851 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35853 /* Access to the vec_set patterns. */
35854 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35855 "__builtin_ia32_vec_set_v2di",
35856 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35858 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35859 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35861 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35862 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35864 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35865 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35867 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35868 "__builtin_ia32_vec_set_v4hi",
35869 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35871 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35872 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35875 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35876 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35877 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35878 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35879 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35880 "__builtin_ia32_rdseed_di_step",
35881 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35884 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35885 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35886 def_builtin (OPTION_MASK_ISA_64BIT,
35887 "__builtin_ia32_addcarryx_u64",
35888 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35889 IX86_BUILTIN_ADDCARRYX64);
35892 def_builtin (0, "__builtin_ia32_sbb_u32",
35893 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35894 def_builtin (OPTION_MASK_ISA_64BIT,
35895 "__builtin_ia32_sbb_u64",
35896 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35897 IX86_BUILTIN_SBB64);
35899 /* Read/write FLAGS. */
35900 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35901 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35902 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35903 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35904 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35905 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35906 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35907 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35910 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35911 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35914 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35915 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35917 /* MONITORX and MWAITX. */
35918 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35919 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35920 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35921 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35924 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
35925 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
35927 /* Add FMA4 multi-arg argument instructions */
35928 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35933 ftype = (enum ix86_builtin_func_type) d->flag;
35934 def_builtin_const (d->mask, d->name, ftype, d->code);
35939 ix86_init_mpx_builtins ()
35941 const struct builtin_description * d;
35942 enum ix86_builtin_func_type ftype;
35946 for (i = 0, d = bdesc_mpx;
35947 i < ARRAY_SIZE (bdesc_mpx);
35953 ftype = (enum ix86_builtin_func_type) d->flag;
35954 decl = def_builtin (d->mask, d->name, ftype, d->code);
35956 /* With no leaf and nothrow flags for MPX builtins
35957 abnormal edges may follow its call when setjmp
35958 presents in the function. Since we may have a lot
35959 of MPX builtins calls it causes lots of useless
35960 edges and enormous PHI nodes. To avoid this we mark
35961 MPX builtins as leaf and nothrow. */
35964 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35966 TREE_NOTHROW (decl) = 1;
35970 ix86_builtins_isa[(int)d->code].leaf_p = true;
35971 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35975 for (i = 0, d = bdesc_mpx_const;
35976 i < ARRAY_SIZE (bdesc_mpx_const);
35982 ftype = (enum ix86_builtin_func_type) d->flag;
35983 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
35987 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35989 TREE_NOTHROW (decl) = 1;
35993 ix86_builtins_isa[(int)d->code].leaf_p = true;
35994 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35999 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36000 to return a pointer to VERSION_DECL if the outcome of the expression
36001 formed by PREDICATE_CHAIN is true. This function will be called during
36002 version dispatch to decide which function version to execute. It returns
36003 the basic block at the end, to which more conditions can be added. */
36006 add_condition_to_bb (tree function_decl, tree version_decl,
36007 tree predicate_chain, basic_block new_bb)
36009 gimple *return_stmt;
36010 tree convert_expr, result_var;
36011 gimple *convert_stmt;
36012 gimple *call_cond_stmt;
36013 gimple *if_else_stmt;
36015 basic_block bb1, bb2, bb3;
36018 tree cond_var, and_expr_var = NULL_TREE;
36021 tree predicate_decl, predicate_arg;
36023 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36025 gcc_assert (new_bb != NULL);
36026 gseq = bb_seq (new_bb);
36029 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36030 build_fold_addr_expr (version_decl));
36031 result_var = create_tmp_var (ptr_type_node);
36032 convert_stmt = gimple_build_assign (result_var, convert_expr);
36033 return_stmt = gimple_build_return (result_var);
36035 if (predicate_chain == NULL_TREE)
36037 gimple_seq_add_stmt (&gseq, convert_stmt);
36038 gimple_seq_add_stmt (&gseq, return_stmt);
36039 set_bb_seq (new_bb, gseq);
36040 gimple_set_bb (convert_stmt, new_bb);
36041 gimple_set_bb (return_stmt, new_bb);
36046 while (predicate_chain != NULL)
36048 cond_var = create_tmp_var (integer_type_node);
36049 predicate_decl = TREE_PURPOSE (predicate_chain);
36050 predicate_arg = TREE_VALUE (predicate_chain);
36051 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36052 gimple_call_set_lhs (call_cond_stmt, cond_var);
36054 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36055 gimple_set_bb (call_cond_stmt, new_bb);
36056 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36058 predicate_chain = TREE_CHAIN (predicate_chain);
36060 if (and_expr_var == NULL)
36061 and_expr_var = cond_var;
36064 gimple *assign_stmt;
36065 /* Use MIN_EXPR to check if any integer is zero?.
36066 and_expr_var = min_expr <cond_var, and_expr_var> */
36067 assign_stmt = gimple_build_assign (and_expr_var,
36068 build2 (MIN_EXPR, integer_type_node,
36069 cond_var, and_expr_var));
36071 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
36072 gimple_set_bb (assign_stmt, new_bb);
36073 gimple_seq_add_stmt (&gseq, assign_stmt);
36077 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
36079 NULL_TREE, NULL_TREE);
36080 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36081 gimple_set_bb (if_else_stmt, new_bb);
36082 gimple_seq_add_stmt (&gseq, if_else_stmt);
36084 gimple_seq_add_stmt (&gseq, convert_stmt);
36085 gimple_seq_add_stmt (&gseq, return_stmt);
36086 set_bb_seq (new_bb, gseq);
36089 e12 = split_block (bb1, if_else_stmt);
36091 e12->flags &= ~EDGE_FALLTHRU;
36092 e12->flags |= EDGE_TRUE_VALUE;
36094 e23 = split_block (bb2, return_stmt);
36096 gimple_set_bb (convert_stmt, bb2);
36097 gimple_set_bb (return_stmt, bb2);
36100 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36103 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36110 /* This parses the attribute arguments to target in DECL and determines
36111 the right builtin to use to match the platform specification.
36112 It returns the priority value for this version decl. If PREDICATE_LIST
36113 is not NULL, it stores the list of cpu features that need to be checked
36114 before dispatching this function. */
36116 static unsigned int
36117 get_builtin_code_for_version (tree decl, tree *predicate_list)
36120 struct cl_target_option cur_target;
36122 struct cl_target_option *new_target;
36123 const char *arg_str = NULL;
36124 const char *attrs_str = NULL;
36125 char *tok_str = NULL;
36128 /* Priority of i386 features, greater value is higher priority. This is
36129 used to decide the order in which function dispatch must happen. For
36130 instance, a version specialized for SSE4.2 should be checked for dispatch
36131 before a version for SSE3, as SSE4.2 implies SSE3. */
36132 enum feature_priority
36165 enum feature_priority priority = P_ZERO;
36167 /* These are the target attribute strings for which a dispatcher is
36168 available, from fold_builtin_cpu. */
36170 static struct _feature_list
36172 const char *const name;
36173 const enum feature_priority priority;
36175 const feature_list[] =
36181 {"sse4a", P_SSE4_A},
36182 {"ssse3", P_SSSE3},
36183 {"sse4.1", P_SSE4_1},
36184 {"sse4.2", P_SSE4_2},
36185 {"popcnt", P_POPCNT},
36187 {"pclmul", P_PCLMUL},
36195 {"avx512f", P_AVX512F}
36199 static unsigned int NUM_FEATURES
36200 = sizeof (feature_list) / sizeof (struct _feature_list);
36204 tree predicate_chain = NULL_TREE;
36205 tree predicate_decl, predicate_arg;
36207 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36208 gcc_assert (attrs != NULL);
36210 attrs = TREE_VALUE (TREE_VALUE (attrs));
36212 gcc_assert (TREE_CODE (attrs) == STRING_CST);
36213 attrs_str = TREE_STRING_POINTER (attrs);
36215 /* Return priority zero for default function. */
36216 if (strcmp (attrs_str, "default") == 0)
36219 /* Handle arch= if specified. For priority, set it to be 1 more than
36220 the best instruction set the processor can handle. For instance, if
36221 there is a version for atom and a version for ssse3 (the highest ISA
36222 priority for atom), the atom version must be checked for dispatch
36223 before the ssse3 version. */
36224 if (strstr (attrs_str, "arch=") != NULL)
36226 cl_target_option_save (&cur_target, &global_options);
36227 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
36228 &global_options_set);
36230 gcc_assert (target_node);
36231 new_target = TREE_TARGET_OPTION (target_node);
36232 gcc_assert (new_target);
36234 if (new_target->arch_specified && new_target->arch > 0)
36236 switch (new_target->arch)
36238 case PROCESSOR_CORE2:
36240 priority = P_PROC_SSSE3;
36242 case PROCESSOR_NEHALEM:
36243 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
36244 arg_str = "westmere";
36246 /* We translate "arch=corei7" and "arch=nehalem" to
36247 "corei7" so that it will be mapped to M_INTEL_COREI7
36248 as cpu type to cover all M_INTEL_COREI7_XXXs. */
36249 arg_str = "corei7";
36250 priority = P_PROC_SSE4_2;
36252 case PROCESSOR_SANDYBRIDGE:
36253 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
36254 arg_str = "ivybridge";
36256 arg_str = "sandybridge";
36257 priority = P_PROC_AVX;
36259 case PROCESSOR_HASWELL:
36260 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
36261 arg_str = "skylake-avx512";
36262 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
36263 arg_str = "skylake";
36264 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
36265 arg_str = "broadwell";
36267 arg_str = "haswell";
36268 priority = P_PROC_AVX2;
36270 case PROCESSOR_BONNELL:
36271 arg_str = "bonnell";
36272 priority = P_PROC_SSSE3;
36274 case PROCESSOR_KNL:
36276 priority = P_PROC_AVX512F;
36278 case PROCESSOR_SILVERMONT:
36279 arg_str = "silvermont";
36280 priority = P_PROC_SSE4_2;
36282 case PROCESSOR_AMDFAM10:
36283 arg_str = "amdfam10h";
36284 priority = P_PROC_SSE4_A;
36286 case PROCESSOR_BTVER1:
36287 arg_str = "btver1";
36288 priority = P_PROC_SSE4_A;
36290 case PROCESSOR_BTVER2:
36291 arg_str = "btver2";
36292 priority = P_PROC_BMI;
36294 case PROCESSOR_BDVER1:
36295 arg_str = "bdver1";
36296 priority = P_PROC_XOP;
36298 case PROCESSOR_BDVER2:
36299 arg_str = "bdver2";
36300 priority = P_PROC_FMA;
36302 case PROCESSOR_BDVER3:
36303 arg_str = "bdver3";
36304 priority = P_PROC_FMA;
36306 case PROCESSOR_BDVER4:
36307 arg_str = "bdver4";
36308 priority = P_PROC_AVX2;
36310 case PROCESSOR_ZNVER1:
36311 arg_str = "znver1";
36312 priority = P_PROC_AVX2;
36317 cl_target_option_restore (&global_options, &cur_target);
36319 if (predicate_list && arg_str == NULL)
36321 error_at (DECL_SOURCE_LOCATION (decl),
36322 "No dispatcher found for the versioning attributes");
36326 if (predicate_list)
36328 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
36329 /* For a C string literal the length includes the trailing NULL. */
36330 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36331 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36336 /* Process feature name. */
36337 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
36338 strcpy (tok_str, attrs_str);
36339 token = strtok (tok_str, ",");
36340 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
36342 while (token != NULL)
36344 /* Do not process "arch=" */
36345 if (strncmp (token, "arch=", 5) == 0)
36347 token = strtok (NULL, ",");
36350 for (i = 0; i < NUM_FEATURES; ++i)
36352 if (strcmp (token, feature_list[i].name) == 0)
36354 if (predicate_list)
36356 predicate_arg = build_string_literal (
36357 strlen (feature_list[i].name) + 1,
36358 feature_list[i].name);
36359 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36362 /* Find the maximum priority feature. */
36363 if (feature_list[i].priority > priority)
36364 priority = feature_list[i].priority;
36369 if (predicate_list && i == NUM_FEATURES)
36371 error_at (DECL_SOURCE_LOCATION (decl),
36372 "No dispatcher found for %s", token);
36375 token = strtok (NULL, ",");
36379 if (predicate_list && predicate_chain == NULL_TREE)
36381 error_at (DECL_SOURCE_LOCATION (decl),
36382 "No dispatcher found for the versioning attributes : %s",
36386 else if (predicate_list)
36388 predicate_chain = nreverse (predicate_chain);
36389 *predicate_list = predicate_chain;
36395 /* This compares the priority of target features in function DECL1
36396 and DECL2. It returns positive value if DECL1 is higher priority,
36397 negative value if DECL2 is higher priority and 0 if they are the
36401 ix86_compare_version_priority (tree decl1, tree decl2)
36403 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
36404 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
36406 return (int)priority1 - (int)priority2;
36409 /* V1 and V2 point to function versions with different priorities
36410 based on the target ISA. This function compares their priorities. */
36413 feature_compare (const void *v1, const void *v2)
36415 typedef struct _function_version_info
36418 tree predicate_chain;
36419 unsigned int dispatch_priority;
36420 } function_version_info;
36422 const function_version_info c1 = *(const function_version_info *)v1;
36423 const function_version_info c2 = *(const function_version_info *)v2;
36424 return (c2.dispatch_priority - c1.dispatch_priority);
36427 /* This function generates the dispatch function for
36428 multi-versioned functions. DISPATCH_DECL is the function which will
36429 contain the dispatch logic. FNDECLS are the function choices for
36430 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
36431 in DISPATCH_DECL in which the dispatch code is generated. */
36434 dispatch_function_versions (tree dispatch_decl,
36436 basic_block *empty_bb)
36439 gimple *ifunc_cpu_init_stmt;
36443 vec<tree> *fndecls;
36444 unsigned int num_versions = 0;
36445 unsigned int actual_versions = 0;
36448 struct _function_version_info
36451 tree predicate_chain;
36452 unsigned int dispatch_priority;
36453 }*function_version_info;
36455 gcc_assert (dispatch_decl != NULL
36456 && fndecls_p != NULL
36457 && empty_bb != NULL);
36459 /*fndecls_p is actually a vector. */
36460 fndecls = static_cast<vec<tree> *> (fndecls_p);
36462 /* At least one more version other than the default. */
36463 num_versions = fndecls->length ();
36464 gcc_assert (num_versions >= 2);
36466 function_version_info = (struct _function_version_info *)
36467 XNEWVEC (struct _function_version_info, (num_versions - 1));
36469 /* The first version in the vector is the default decl. */
36470 default_decl = (*fndecls)[0];
36472 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36474 gseq = bb_seq (*empty_bb);
36475 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36476 constructors, so explicity call __builtin_cpu_init here. */
36477 ifunc_cpu_init_stmt = gimple_build_call_vec (
36478 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36479 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36480 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36481 set_bb_seq (*empty_bb, gseq);
36486 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36488 tree version_decl = ele;
36489 tree predicate_chain = NULL_TREE;
36490 unsigned int priority;
36491 /* Get attribute string, parse it and find the right predicate decl.
36492 The predicate function could be a lengthy combination of many
36493 features, like arch-type and various isa-variants. */
36494 priority = get_builtin_code_for_version (version_decl,
36497 if (predicate_chain == NULL_TREE)
36500 function_version_info [actual_versions].version_decl = version_decl;
36501 function_version_info [actual_versions].predicate_chain
36503 function_version_info [actual_versions].dispatch_priority = priority;
36507 /* Sort the versions according to descending order of dispatch priority. The
36508 priority is based on the ISA. This is not a perfect solution. There
36509 could still be ambiguity. If more than one function version is suitable
36510 to execute, which one should be dispatched? In future, allow the user
36511 to specify a dispatch priority next to the version. */
36512 qsort (function_version_info, actual_versions,
36513 sizeof (struct _function_version_info), feature_compare);
36515 for (i = 0; i < actual_versions; ++i)
36516 *empty_bb = add_condition_to_bb (dispatch_decl,
36517 function_version_info[i].version_decl,
36518 function_version_info[i].predicate_chain,
36521 /* dispatch default version at the end. */
36522 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36525 free (function_version_info);
36529 /* Comparator function to be used in qsort routine to sort attribute
36530 specification strings to "target". */
36533 attr_strcmp (const void *v1, const void *v2)
36535 const char *c1 = *(char *const*)v1;
36536 const char *c2 = *(char *const*)v2;
36537 return strcmp (c1, c2);
36540 /* ARGLIST is the argument to target attribute. This function tokenizes
36541 the comma separated arguments, sorts them and returns a string which
36542 is a unique identifier for the comma separated arguments. It also
36543 replaces non-identifier characters "=,-" with "_". */
36546 sorted_attr_string (tree arglist)
36549 size_t str_len_sum = 0;
36550 char **args = NULL;
36551 char *attr_str, *ret_str;
36553 unsigned int argnum = 1;
36556 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36558 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36559 size_t len = strlen (str);
36560 str_len_sum += len + 1;
36561 if (arg != arglist)
36563 for (i = 0; i < strlen (str); i++)
36568 attr_str = XNEWVEC (char, str_len_sum);
36570 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36572 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36573 size_t len = strlen (str);
36574 memcpy (attr_str + str_len_sum, str, len);
36575 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36576 str_len_sum += len + 1;
36579 /* Replace "=,-" with "_". */
36580 for (i = 0; i < strlen (attr_str); i++)
36581 if (attr_str[i] == '=' || attr_str[i]== '-')
36587 args = XNEWVEC (char *, argnum);
36590 attr = strtok (attr_str, ",");
36591 while (attr != NULL)
36595 attr = strtok (NULL, ",");
36598 qsort (args, argnum, sizeof (char *), attr_strcmp);
36600 ret_str = XNEWVEC (char, str_len_sum);
36602 for (i = 0; i < argnum; i++)
36604 size_t len = strlen (args[i]);
36605 memcpy (ret_str + str_len_sum, args[i], len);
36606 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36607 str_len_sum += len + 1;
36611 XDELETEVEC (attr_str);
36615 /* This function changes the assembler name for functions that are
36616 versions. If DECL is a function version and has a "target"
36617 attribute, it appends the attribute string to its assembler name. */
36620 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36623 const char *orig_name, *version_string;
36624 char *attr_str, *assembler_name;
36626 if (DECL_DECLARED_INLINE_P (decl)
36627 && lookup_attribute ("gnu_inline",
36628 DECL_ATTRIBUTES (decl)))
36629 error_at (DECL_SOURCE_LOCATION (decl),
36630 "Function versions cannot be marked as gnu_inline,"
36631 " bodies have to be generated");
36633 if (DECL_VIRTUAL_P (decl)
36634 || DECL_VINDEX (decl))
36635 sorry ("Virtual function multiversioning not supported");
36637 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36639 /* target attribute string cannot be NULL. */
36640 gcc_assert (version_attr != NULL_TREE);
36642 orig_name = IDENTIFIER_POINTER (id);
36644 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36646 if (strcmp (version_string, "default") == 0)
36649 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36650 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36652 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36654 /* Allow assembler name to be modified if already set. */
36655 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36656 SET_DECL_RTL (decl, NULL);
36658 tree ret = get_identifier (assembler_name);
36659 XDELETEVEC (attr_str);
36660 XDELETEVEC (assembler_name);
36664 /* This function returns true if FN1 and FN2 are versions of the same function,
36665 that is, the target strings of the function decls are different. This assumes
36666 that FN1 and FN2 have the same signature. */
36669 ix86_function_versions (tree fn1, tree fn2)
36672 char *target1, *target2;
36675 if (TREE_CODE (fn1) != FUNCTION_DECL
36676 || TREE_CODE (fn2) != FUNCTION_DECL)
36679 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36680 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36682 /* At least one function decl should have the target attribute specified. */
36683 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36686 /* Diagnose missing target attribute if one of the decls is already
36687 multi-versioned. */
36688 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36690 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36692 if (attr2 != NULL_TREE)
36694 std::swap (fn1, fn2);
36697 error_at (DECL_SOURCE_LOCATION (fn2),
36698 "missing %<target%> attribute for multi-versioned %D",
36700 inform (DECL_SOURCE_LOCATION (fn1),
36701 "previous declaration of %D", fn1);
36702 /* Prevent diagnosing of the same error multiple times. */
36703 DECL_ATTRIBUTES (fn2)
36704 = tree_cons (get_identifier ("target"),
36705 copy_node (TREE_VALUE (attr1)),
36706 DECL_ATTRIBUTES (fn2));
36711 target1 = sorted_attr_string (TREE_VALUE (attr1));
36712 target2 = sorted_attr_string (TREE_VALUE (attr2));
36714 /* The sorted target strings must be different for fn1 and fn2
36716 if (strcmp (target1, target2) == 0)
36721 XDELETEVEC (target1);
36722 XDELETEVEC (target2);
36728 ix86_mangle_decl_assembler_name (tree decl, tree id)
36730 /* For function version, add the target suffix to the assembler name. */
36731 if (TREE_CODE (decl) == FUNCTION_DECL
36732 && DECL_FUNCTION_VERSIONED (decl))
36733 id = ix86_mangle_function_version_assembler_name (decl, id);
36734 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36735 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36741 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36742 is true, append the full path name of the source file. */
36745 make_name (tree decl, const char *suffix, bool make_unique)
36747 char *global_var_name;
36750 const char *unique_name = NULL;
36752 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36754 /* Get a unique name that can be used globally without any chances
36755 of collision at link time. */
36757 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36759 name_len = strlen (name) + strlen (suffix) + 2;
36762 name_len += strlen (unique_name) + 1;
36763 global_var_name = XNEWVEC (char, name_len);
36765 /* Use '.' to concatenate names as it is demangler friendly. */
36767 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36770 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36772 return global_var_name;
36775 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36777 /* Make a dispatcher declaration for the multi-versioned function DECL.
36778 Calls to DECL function will be replaced with calls to the dispatcher
36779 by the front-end. Return the decl created. */
36782 make_dispatcher_decl (const tree decl)
36786 tree fn_type, func_type;
36787 bool is_uniq = false;
36789 if (TREE_PUBLIC (decl) == 0)
36792 func_name = make_name (decl, "ifunc", is_uniq);
36794 fn_type = TREE_TYPE (decl);
36795 func_type = build_function_type (TREE_TYPE (fn_type),
36796 TYPE_ARG_TYPES (fn_type));
36798 func_decl = build_fn_decl (func_name, func_type);
36799 XDELETEVEC (func_name);
36800 TREE_USED (func_decl) = 1;
36801 DECL_CONTEXT (func_decl) = NULL_TREE;
36802 DECL_INITIAL (func_decl) = error_mark_node;
36803 DECL_ARTIFICIAL (func_decl) = 1;
36804 /* Mark this func as external, the resolver will flip it again if
36805 it gets generated. */
36806 DECL_EXTERNAL (func_decl) = 1;
36807 /* This will be of type IFUNCs have to be externally visible. */
36808 TREE_PUBLIC (func_decl) = 1;
36815 /* Returns true if decl is multi-versioned and DECL is the default function,
36816 that is it is not tagged with target specific optimization. */
36819 is_function_default_version (const tree decl)
36821 if (TREE_CODE (decl) != FUNCTION_DECL
36822 || !DECL_FUNCTION_VERSIONED (decl))
36824 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36826 attr = TREE_VALUE (TREE_VALUE (attr));
36827 return (TREE_CODE (attr) == STRING_CST
36828 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36831 /* Make a dispatcher declaration for the multi-versioned function DECL.
36832 Calls to DECL function will be replaced with calls to the dispatcher
36833 by the front-end. Returns the decl of the dispatcher function. */
36836 ix86_get_function_versions_dispatcher (void *decl)
36838 tree fn = (tree) decl;
36839 struct cgraph_node *node = NULL;
36840 struct cgraph_node *default_node = NULL;
36841 struct cgraph_function_version_info *node_v = NULL;
36842 struct cgraph_function_version_info *first_v = NULL;
36844 tree dispatch_decl = NULL;
36846 struct cgraph_function_version_info *default_version_info = NULL;
36848 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36850 node = cgraph_node::get (fn);
36851 gcc_assert (node != NULL);
36853 node_v = node->function_version ();
36854 gcc_assert (node_v != NULL);
36856 if (node_v->dispatcher_resolver != NULL)
36857 return node_v->dispatcher_resolver;
36859 /* Find the default version and make it the first node. */
36861 /* Go to the beginning of the chain. */
36862 while (first_v->prev != NULL)
36863 first_v = first_v->prev;
36864 default_version_info = first_v;
36865 while (default_version_info != NULL)
36867 if (is_function_default_version
36868 (default_version_info->this_node->decl))
36870 default_version_info = default_version_info->next;
36873 /* If there is no default node, just return NULL. */
36874 if (default_version_info == NULL)
36877 /* Make default info the first node. */
36878 if (first_v != default_version_info)
36880 default_version_info->prev->next = default_version_info->next;
36881 if (default_version_info->next)
36882 default_version_info->next->prev = default_version_info->prev;
36883 first_v->prev = default_version_info;
36884 default_version_info->next = first_v;
36885 default_version_info->prev = NULL;
36888 default_node = default_version_info->this_node;
36890 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36891 if (targetm.has_ifunc_p ())
36893 struct cgraph_function_version_info *it_v = NULL;
36894 struct cgraph_node *dispatcher_node = NULL;
36895 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36897 /* Right now, the dispatching is done via ifunc. */
36898 dispatch_decl = make_dispatcher_decl (default_node->decl);
36900 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36901 gcc_assert (dispatcher_node != NULL);
36902 dispatcher_node->dispatcher_function = 1;
36903 dispatcher_version_info
36904 = dispatcher_node->insert_new_function_version ();
36905 dispatcher_version_info->next = default_version_info;
36906 dispatcher_node->definition = 1;
36908 /* Set the dispatcher for all the versions. */
36909 it_v = default_version_info;
36910 while (it_v != NULL)
36912 it_v->dispatcher_resolver = dispatch_decl;
36919 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36920 "multiversioning needs ifunc which is not supported "
36924 return dispatch_decl;
36927 /* Make the resolver function decl to dispatch the versions of
36928 a multi-versioned function, DEFAULT_DECL. Create an
36929 empty basic block in the resolver and store the pointer in
36930 EMPTY_BB. Return the decl of the resolver function. */
36933 make_resolver_func (const tree default_decl,
36934 const tree dispatch_decl,
36935 basic_block *empty_bb)
36937 char *resolver_name;
36938 tree decl, type, decl_name, t;
36939 bool is_uniq = false;
36941 /* IFUNC's have to be globally visible. So, if the default_decl is
36942 not, then the name of the IFUNC should be made unique. */
36943 if (TREE_PUBLIC (default_decl) == 0)
36946 /* Append the filename to the resolver function if the versions are
36947 not externally visible. This is because the resolver function has
36948 to be externally visible for the loader to find it. So, appending
36949 the filename will prevent conflicts with a resolver function from
36950 another module which is based on the same version name. */
36951 resolver_name = make_name (default_decl, "resolver", is_uniq);
36953 /* The resolver function should return a (void *). */
36954 type = build_function_type_list (ptr_type_node, NULL_TREE);
36956 decl = build_fn_decl (resolver_name, type);
36957 decl_name = get_identifier (resolver_name);
36958 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36960 DECL_NAME (decl) = decl_name;
36961 TREE_USED (decl) = 1;
36962 DECL_ARTIFICIAL (decl) = 1;
36963 DECL_IGNORED_P (decl) = 0;
36964 /* IFUNC resolvers have to be externally visible. */
36965 TREE_PUBLIC (decl) = 1;
36966 DECL_UNINLINABLE (decl) = 1;
36968 /* Resolver is not external, body is generated. */
36969 DECL_EXTERNAL (decl) = 0;
36970 DECL_EXTERNAL (dispatch_decl) = 0;
36972 DECL_CONTEXT (decl) = NULL_TREE;
36973 DECL_INITIAL (decl) = make_node (BLOCK);
36974 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36976 if (DECL_COMDAT_GROUP (default_decl)
36977 || TREE_PUBLIC (default_decl))
36979 /* In this case, each translation unit with a call to this
36980 versioned function will put out a resolver. Ensure it
36981 is comdat to keep just one copy. */
36982 DECL_COMDAT (decl) = 1;
36983 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
36985 /* Build result decl and add to function_decl. */
36986 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36987 DECL_ARTIFICIAL (t) = 1;
36988 DECL_IGNORED_P (t) = 1;
36989 DECL_RESULT (decl) = t;
36991 gimplify_function_tree (decl);
36992 push_cfun (DECL_STRUCT_FUNCTION (decl));
36993 *empty_bb = init_lowered_empty_function (decl, false, 0);
36995 cgraph_node::add_new_function (decl, true);
36996 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37000 gcc_assert (dispatch_decl != NULL);
37001 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37002 DECL_ATTRIBUTES (dispatch_decl)
37003 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37005 /* Create the alias for dispatch to resolver here. */
37006 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37007 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37008 XDELETEVEC (resolver_name);
37012 /* Generate the dispatching code body to dispatch multi-versioned function
37013 DECL. The target hook is called to process the "target" attributes and
37014 provide the code to dispatch the right function at run-time. NODE points
37015 to the dispatcher decl whose body will be created. */
37018 ix86_generate_version_dispatcher_body (void *node_p)
37020 tree resolver_decl;
37021 basic_block empty_bb;
37022 tree default_ver_decl;
37023 struct cgraph_node *versn;
37024 struct cgraph_node *node;
37026 struct cgraph_function_version_info *node_version_info = NULL;
37027 struct cgraph_function_version_info *versn_info = NULL;
37029 node = (cgraph_node *)node_p;
37031 node_version_info = node->function_version ();
37032 gcc_assert (node->dispatcher_function
37033 && node_version_info != NULL);
37035 if (node_version_info->dispatcher_resolver)
37036 return node_version_info->dispatcher_resolver;
37038 /* The first version in the chain corresponds to the default version. */
37039 default_ver_decl = node_version_info->next->this_node->decl;
37041 /* node is going to be an alias, so remove the finalized bit. */
37042 node->definition = false;
37044 resolver_decl = make_resolver_func (default_ver_decl,
37045 node->decl, &empty_bb);
37047 node_version_info->dispatcher_resolver = resolver_decl;
37049 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37051 auto_vec<tree, 2> fn_ver_vec;
37053 for (versn_info = node_version_info->next; versn_info;
37054 versn_info = versn_info->next)
37056 versn = versn_info->this_node;
37057 /* Check for virtual functions here again, as by this time it should
37058 have been determined if this function needs a vtable index or
37059 not. This happens for methods in derived classes that override
37060 virtual methods in base classes but are not explicitly marked as
37062 if (DECL_VINDEX (versn->decl))
37063 sorry ("Virtual function multiversioning not supported");
37065 fn_ver_vec.safe_push (versn->decl);
37068 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37069 cgraph_edge::rebuild_edges ();
37071 return resolver_decl;
37073 /* This builds the processor_model struct type defined in
37074 libgcc/config/i386/cpuinfo.c */
37077 build_processor_model_struct (void)
37079 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
37081 tree field = NULL_TREE, field_chain = NULL_TREE;
37083 tree type = make_node (RECORD_TYPE);
37085 /* The first 3 fields are unsigned int. */
37086 for (i = 0; i < 3; ++i)
37088 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37089 get_identifier (field_name[i]), unsigned_type_node);
37090 if (field_chain != NULL_TREE)
37091 DECL_CHAIN (field) = field_chain;
37092 field_chain = field;
37095 /* The last field is an array of unsigned integers of size one. */
37096 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37097 get_identifier (field_name[3]),
37098 build_array_type (unsigned_type_node,
37099 build_index_type (size_one_node)));
37100 if (field_chain != NULL_TREE)
37101 DECL_CHAIN (field) = field_chain;
37102 field_chain = field;
37104 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
37108 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
37111 make_var_decl (tree type, const char *name)
37115 new_decl = build_decl (UNKNOWN_LOCATION,
37117 get_identifier(name),
37120 DECL_EXTERNAL (new_decl) = 1;
37121 TREE_STATIC (new_decl) = 1;
37122 TREE_PUBLIC (new_decl) = 1;
37123 DECL_INITIAL (new_decl) = 0;
37124 DECL_ARTIFICIAL (new_decl) = 0;
37125 DECL_PRESERVE_P (new_decl) = 1;
37127 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
37128 assemble_variable (new_decl, 0, 0, 0);
37133 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
37134 into an integer defined in libgcc/config/i386/cpuinfo.c */
37137 fold_builtin_cpu (tree fndecl, tree *args)
37140 enum ix86_builtins fn_code = (enum ix86_builtins)
37141 DECL_FUNCTION_CODE (fndecl);
37142 tree param_string_cst = NULL;
37144 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
37145 enum processor_features
37178 /* These are the values for vendor types and cpu types and subtypes
37179 in cpuinfo.c. Cpu types and subtypes should be subtracted by
37180 the corresponding start value. */
37181 enum processor_model
37191 M_INTEL_SILVERMONT,
37195 M_CPU_SUBTYPE_START,
37196 M_INTEL_COREI7_NEHALEM,
37197 M_INTEL_COREI7_WESTMERE,
37198 M_INTEL_COREI7_SANDYBRIDGE,
37199 M_AMDFAM10H_BARCELONA,
37200 M_AMDFAM10H_SHANGHAI,
37201 M_AMDFAM10H_ISTANBUL,
37202 M_AMDFAM15H_BDVER1,
37203 M_AMDFAM15H_BDVER2,
37204 M_AMDFAM15H_BDVER3,
37205 M_AMDFAM15H_BDVER4,
37206 M_AMDFAM17H_ZNVER1,
37207 M_INTEL_COREI7_IVYBRIDGE,
37208 M_INTEL_COREI7_HASWELL,
37209 M_INTEL_COREI7_BROADWELL,
37210 M_INTEL_COREI7_SKYLAKE,
37211 M_INTEL_COREI7_SKYLAKE_AVX512
37214 static struct _arch_names_table
37216 const char *const name;
37217 const enum processor_model model;
37219 const arch_names_table[] =
37222 {"intel", M_INTEL},
37223 {"atom", M_INTEL_BONNELL},
37224 {"slm", M_INTEL_SILVERMONT},
37225 {"core2", M_INTEL_CORE2},
37226 {"corei7", M_INTEL_COREI7},
37227 {"nehalem", M_INTEL_COREI7_NEHALEM},
37228 {"westmere", M_INTEL_COREI7_WESTMERE},
37229 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
37230 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
37231 {"haswell", M_INTEL_COREI7_HASWELL},
37232 {"broadwell", M_INTEL_COREI7_BROADWELL},
37233 {"skylake", M_INTEL_COREI7_SKYLAKE},
37234 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
37235 {"bonnell", M_INTEL_BONNELL},
37236 {"silvermont", M_INTEL_SILVERMONT},
37237 {"knl", M_INTEL_KNL},
37238 {"amdfam10h", M_AMDFAM10H},
37239 {"barcelona", M_AMDFAM10H_BARCELONA},
37240 {"shanghai", M_AMDFAM10H_SHANGHAI},
37241 {"istanbul", M_AMDFAM10H_ISTANBUL},
37242 {"btver1", M_AMD_BTVER1},
37243 {"amdfam15h", M_AMDFAM15H},
37244 {"bdver1", M_AMDFAM15H_BDVER1},
37245 {"bdver2", M_AMDFAM15H_BDVER2},
37246 {"bdver3", M_AMDFAM15H_BDVER3},
37247 {"bdver4", M_AMDFAM15H_BDVER4},
37248 {"btver2", M_AMD_BTVER2},
37249 {"znver1", M_AMDFAM17H_ZNVER1},
37252 static struct _isa_names_table
37254 const char *const name;
37255 const enum processor_features feature;
37257 const isa_names_table[] =
37261 {"popcnt", F_POPCNT},
37265 {"ssse3", F_SSSE3},
37266 {"sse4a", F_SSE4_A},
37267 {"sse4.1", F_SSE4_1},
37268 {"sse4.2", F_SSE4_2},
37274 {"avx512f", F_AVX512F},
37278 {"pclmul", F_PCLMUL},
37279 {"avx512vl",F_AVX512VL},
37280 {"avx512bw",F_AVX512BW},
37281 {"avx512dq",F_AVX512DQ},
37282 {"avx512cd",F_AVX512CD},
37283 {"avx512er",F_AVX512ER},
37284 {"avx512pf",F_AVX512PF},
37285 {"avx512vbmi",F_AVX512VBMI},
37286 {"avx512ifma",F_AVX512IFMA},
37289 tree __processor_model_type = build_processor_model_struct ();
37290 tree __cpu_model_var = make_var_decl (__processor_model_type,
37294 varpool_node::add (__cpu_model_var);
37296 gcc_assert ((args != NULL) && (*args != NULL));
37298 param_string_cst = *args;
37299 while (param_string_cst
37300 && TREE_CODE (param_string_cst) != STRING_CST)
37302 /* *args must be a expr that can contain other EXPRS leading to a
37304 if (!EXPR_P (param_string_cst))
37306 error ("Parameter to builtin must be a string constant or literal");
37307 return integer_zero_node;
37309 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
37312 gcc_assert (param_string_cst);
37314 if (fn_code == IX86_BUILTIN_CPU_IS)
37320 unsigned int field_val = 0;
37321 unsigned int NUM_ARCH_NAMES
37322 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
37324 for (i = 0; i < NUM_ARCH_NAMES; i++)
37325 if (strcmp (arch_names_table[i].name,
37326 TREE_STRING_POINTER (param_string_cst)) == 0)
37329 if (i == NUM_ARCH_NAMES)
37331 error ("Parameter to builtin not valid: %s",
37332 TREE_STRING_POINTER (param_string_cst));
37333 return integer_zero_node;
37336 field = TYPE_FIELDS (__processor_model_type);
37337 field_val = arch_names_table[i].model;
37339 /* CPU types are stored in the next field. */
37340 if (field_val > M_CPU_TYPE_START
37341 && field_val < M_CPU_SUBTYPE_START)
37343 field = DECL_CHAIN (field);
37344 field_val -= M_CPU_TYPE_START;
37347 /* CPU subtypes are stored in the next field. */
37348 if (field_val > M_CPU_SUBTYPE_START)
37350 field = DECL_CHAIN ( DECL_CHAIN (field));
37351 field_val -= M_CPU_SUBTYPE_START;
37354 /* Get the appropriate field in __cpu_model. */
37355 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37358 /* Check the value. */
37359 final = build2 (EQ_EXPR, unsigned_type_node, ref,
37360 build_int_cstu (unsigned_type_node, field_val));
37361 return build1 (CONVERT_EXPR, integer_type_node, final);
37363 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37370 unsigned int field_val = 0;
37371 unsigned int NUM_ISA_NAMES
37372 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
37374 for (i = 0; i < NUM_ISA_NAMES; i++)
37375 if (strcmp (isa_names_table[i].name,
37376 TREE_STRING_POINTER (param_string_cst)) == 0)
37379 if (i == NUM_ISA_NAMES)
37381 error ("Parameter to builtin not valid: %s",
37382 TREE_STRING_POINTER (param_string_cst));
37383 return integer_zero_node;
37386 field = TYPE_FIELDS (__processor_model_type);
37387 /* Get the last field, which is __cpu_features. */
37388 while (DECL_CHAIN (field))
37389 field = DECL_CHAIN (field);
37391 /* Get the appropriate field: __cpu_model.__cpu_features */
37392 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37395 /* Access the 0th element of __cpu_features array. */
37396 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
37397 integer_zero_node, NULL_TREE, NULL_TREE);
37399 field_val = (1 << isa_names_table[i].feature);
37400 /* Return __cpu_model.__cpu_features[0] & field_val */
37401 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
37402 build_int_cstu (unsigned_type_node, field_val));
37403 return build1 (CONVERT_EXPR, integer_type_node, final);
37405 gcc_unreachable ();
37409 ix86_fold_builtin (tree fndecl, int n_args,
37410 tree *args, bool ignore ATTRIBUTE_UNUSED)
37412 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
37414 enum ix86_builtins fn_code = (enum ix86_builtins)
37415 DECL_FUNCTION_CODE (fndecl);
37416 if (fn_code == IX86_BUILTIN_CPU_IS
37417 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37419 gcc_assert (n_args == 1);
37420 return fold_builtin_cpu (fndecl, args);
37424 #ifdef SUBTARGET_FOLD_BUILTIN
37425 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
37431 /* Make builtins to detect cpu type and features supported. NAME is
37432 the builtin name, CODE is the builtin code, and FTYPE is the function
37433 type of the builtin. */
37436 make_cpu_type_builtin (const char* name, int code,
37437 enum ix86_builtin_func_type ftype, bool is_const)
37442 type = ix86_get_builtin_func_type (ftype);
37443 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37445 gcc_assert (decl != NULL_TREE);
37446 ix86_builtins[(int) code] = decl;
37447 TREE_READONLY (decl) = is_const;
37450 /* Make builtins to get CPU type and features supported. The created
37453 __builtin_cpu_init (), to detect cpu type and features,
37454 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37455 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37459 ix86_init_platform_type_builtins (void)
37461 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37462 INT_FTYPE_VOID, false);
37463 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37464 INT_FTYPE_PCCHAR, true);
37465 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37466 INT_FTYPE_PCCHAR, true);
37469 /* Internal method for ix86_init_builtins. */
37472 ix86_init_builtins_va_builtins_abi (void)
37474 tree ms_va_ref, sysv_va_ref;
37475 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37476 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37477 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37478 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37482 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37483 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37484 ms_va_ref = build_reference_type (ms_va_list_type_node);
37486 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37489 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37490 fnvoid_va_start_ms =
37491 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37492 fnvoid_va_end_sysv =
37493 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37494 fnvoid_va_start_sysv =
37495 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37497 fnvoid_va_copy_ms =
37498 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37500 fnvoid_va_copy_sysv =
37501 build_function_type_list (void_type_node, sysv_va_ref,
37502 sysv_va_ref, NULL_TREE);
37504 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37505 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37506 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37507 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37508 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37509 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37510 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37511 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37512 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37513 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37514 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37515 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37519 ix86_init_builtin_types (void)
37521 tree float128_type_node, float80_type_node;
37523 /* The __float80 type. */
37524 float80_type_node = long_double_type_node;
37525 if (TYPE_MODE (float80_type_node) != XFmode)
37527 /* The __float80 type. */
37528 float80_type_node = make_node (REAL_TYPE);
37530 TYPE_PRECISION (float80_type_node) = 80;
37531 layout_type (float80_type_node);
37533 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37535 /* The __float128 type. */
37536 float128_type_node = make_node (REAL_TYPE);
37537 TYPE_PRECISION (float128_type_node) = 128;
37538 layout_type (float128_type_node);
37539 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37541 /* This macro is built by i386-builtin-types.awk. */
37542 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37546 ix86_init_builtins (void)
37550 ix86_init_builtin_types ();
37552 /* Builtins to get CPU type and features. */
37553 ix86_init_platform_type_builtins ();
37555 /* TFmode support builtins. */
37556 def_builtin_const (0, "__builtin_infq",
37557 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37558 def_builtin_const (0, "__builtin_huge_valq",
37559 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37561 /* We will expand them to normal call if SSE isn't available since
37562 they are used by libgcc. */
37563 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37564 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37565 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37566 TREE_READONLY (t) = 1;
37567 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37569 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37570 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37571 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37572 TREE_READONLY (t) = 1;
37573 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37575 ix86_init_tm_builtins ();
37576 ix86_init_mmx_sse_builtins ();
37577 ix86_init_mpx_builtins ();
37580 ix86_init_builtins_va_builtins_abi ();
37582 #ifdef SUBTARGET_INIT_BUILTINS
37583 SUBTARGET_INIT_BUILTINS;
37587 /* Return the ix86 builtin for CODE. */
37590 ix86_builtin_decl (unsigned code, bool)
37592 if (code >= IX86_BUILTIN_MAX)
37593 return error_mark_node;
37595 return ix86_builtins[code];
37598 /* Errors in the source file can cause expand_expr to return const0_rtx
37599 where we expect a vector. To avoid crashing, use one of the vector
37600 clear instructions. */
37602 safe_vector_operand (rtx x, machine_mode mode)
37604 if (x == const0_rtx)
37605 x = CONST0_RTX (mode);
37609 /* Fixup modeless constants to fit required mode. */
37611 fixup_modeless_constant (rtx x, machine_mode mode)
37613 if (GET_MODE (x) == VOIDmode)
37614 x = convert_to_mode (mode, x, 1);
37618 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37621 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37624 tree arg0 = CALL_EXPR_ARG (exp, 0);
37625 tree arg1 = CALL_EXPR_ARG (exp, 1);
37626 rtx op0 = expand_normal (arg0);
37627 rtx op1 = expand_normal (arg1);
37628 machine_mode tmode = insn_data[icode].operand[0].mode;
37629 machine_mode mode0 = insn_data[icode].operand[1].mode;
37630 machine_mode mode1 = insn_data[icode].operand[2].mode;
37632 if (VECTOR_MODE_P (mode0))
37633 op0 = safe_vector_operand (op0, mode0);
37634 if (VECTOR_MODE_P (mode1))
37635 op1 = safe_vector_operand (op1, mode1);
37637 if (optimize || !target
37638 || GET_MODE (target) != tmode
37639 || !insn_data[icode].operand[0].predicate (target, tmode))
37640 target = gen_reg_rtx (tmode);
37642 if (GET_MODE (op1) == SImode && mode1 == TImode)
37644 rtx x = gen_reg_rtx (V4SImode);
37645 emit_insn (gen_sse2_loadd (x, op1));
37646 op1 = gen_lowpart (TImode, x);
37649 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37650 op0 = copy_to_mode_reg (mode0, op0);
37651 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37652 op1 = copy_to_mode_reg (mode1, op1);
37654 pat = GEN_FCN (icode) (target, op0, op1);
37663 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37666 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37667 enum ix86_builtin_func_type m_type,
37668 enum rtx_code sub_code)
37673 bool comparison_p = false;
37675 bool last_arg_constant = false;
37676 int num_memory = 0;
37682 machine_mode tmode = insn_data[icode].operand[0].mode;
37686 case MULTI_ARG_4_DF2_DI_I:
37687 case MULTI_ARG_4_DF2_DI_I1:
37688 case MULTI_ARG_4_SF2_SI_I:
37689 case MULTI_ARG_4_SF2_SI_I1:
37691 last_arg_constant = true;
37694 case MULTI_ARG_3_SF:
37695 case MULTI_ARG_3_DF:
37696 case MULTI_ARG_3_SF2:
37697 case MULTI_ARG_3_DF2:
37698 case MULTI_ARG_3_DI:
37699 case MULTI_ARG_3_SI:
37700 case MULTI_ARG_3_SI_DI:
37701 case MULTI_ARG_3_HI:
37702 case MULTI_ARG_3_HI_SI:
37703 case MULTI_ARG_3_QI:
37704 case MULTI_ARG_3_DI2:
37705 case MULTI_ARG_3_SI2:
37706 case MULTI_ARG_3_HI2:
37707 case MULTI_ARG_3_QI2:
37711 case MULTI_ARG_2_SF:
37712 case MULTI_ARG_2_DF:
37713 case MULTI_ARG_2_DI:
37714 case MULTI_ARG_2_SI:
37715 case MULTI_ARG_2_HI:
37716 case MULTI_ARG_2_QI:
37720 case MULTI_ARG_2_DI_IMM:
37721 case MULTI_ARG_2_SI_IMM:
37722 case MULTI_ARG_2_HI_IMM:
37723 case MULTI_ARG_2_QI_IMM:
37725 last_arg_constant = true;
37728 case MULTI_ARG_1_SF:
37729 case MULTI_ARG_1_DF:
37730 case MULTI_ARG_1_SF2:
37731 case MULTI_ARG_1_DF2:
37732 case MULTI_ARG_1_DI:
37733 case MULTI_ARG_1_SI:
37734 case MULTI_ARG_1_HI:
37735 case MULTI_ARG_1_QI:
37736 case MULTI_ARG_1_SI_DI:
37737 case MULTI_ARG_1_HI_DI:
37738 case MULTI_ARG_1_HI_SI:
37739 case MULTI_ARG_1_QI_DI:
37740 case MULTI_ARG_1_QI_SI:
37741 case MULTI_ARG_1_QI_HI:
37745 case MULTI_ARG_2_DI_CMP:
37746 case MULTI_ARG_2_SI_CMP:
37747 case MULTI_ARG_2_HI_CMP:
37748 case MULTI_ARG_2_QI_CMP:
37750 comparison_p = true;
37753 case MULTI_ARG_2_SF_TF:
37754 case MULTI_ARG_2_DF_TF:
37755 case MULTI_ARG_2_DI_TF:
37756 case MULTI_ARG_2_SI_TF:
37757 case MULTI_ARG_2_HI_TF:
37758 case MULTI_ARG_2_QI_TF:
37764 gcc_unreachable ();
37767 if (optimize || !target
37768 || GET_MODE (target) != tmode
37769 || !insn_data[icode].operand[0].predicate (target, tmode))
37770 target = gen_reg_rtx (tmode);
37772 gcc_assert (nargs <= 4);
37774 for (i = 0; i < nargs; i++)
37776 tree arg = CALL_EXPR_ARG (exp, i);
37777 rtx op = expand_normal (arg);
37778 int adjust = (comparison_p) ? 1 : 0;
37779 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37781 if (last_arg_constant && i == nargs - 1)
37783 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37785 enum insn_code new_icode = icode;
37788 case CODE_FOR_xop_vpermil2v2df3:
37789 case CODE_FOR_xop_vpermil2v4sf3:
37790 case CODE_FOR_xop_vpermil2v4df3:
37791 case CODE_FOR_xop_vpermil2v8sf3:
37792 error ("the last argument must be a 2-bit immediate");
37793 return gen_reg_rtx (tmode);
37794 case CODE_FOR_xop_rotlv2di3:
37795 new_icode = CODE_FOR_rotlv2di3;
37797 case CODE_FOR_xop_rotlv4si3:
37798 new_icode = CODE_FOR_rotlv4si3;
37800 case CODE_FOR_xop_rotlv8hi3:
37801 new_icode = CODE_FOR_rotlv8hi3;
37803 case CODE_FOR_xop_rotlv16qi3:
37804 new_icode = CODE_FOR_rotlv16qi3;
37806 if (CONST_INT_P (op))
37808 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37809 op = GEN_INT (INTVAL (op) & mask);
37810 gcc_checking_assert
37811 (insn_data[icode].operand[i + 1].predicate (op, mode));
37815 gcc_checking_assert
37817 && insn_data[new_icode].operand[0].mode == tmode
37818 && insn_data[new_icode].operand[1].mode == tmode
37819 && insn_data[new_icode].operand[2].mode == mode
37820 && insn_data[new_icode].operand[0].predicate
37821 == insn_data[icode].operand[0].predicate
37822 && insn_data[new_icode].operand[1].predicate
37823 == insn_data[icode].operand[1].predicate);
37829 gcc_unreachable ();
37836 if (VECTOR_MODE_P (mode))
37837 op = safe_vector_operand (op, mode);
37839 /* If we aren't optimizing, only allow one memory operand to be
37841 if (memory_operand (op, mode))
37844 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37847 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37849 op = force_reg (mode, op);
37853 args[i].mode = mode;
37859 pat = GEN_FCN (icode) (target, args[0].op);
37864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37865 GEN_INT ((int)sub_code));
37866 else if (! comparison_p)
37867 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37870 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37874 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37879 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37883 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37887 gcc_unreachable ();
37897 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37898 insns with vec_merge. */
37901 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37905 tree arg0 = CALL_EXPR_ARG (exp, 0);
37906 rtx op1, op0 = expand_normal (arg0);
37907 machine_mode tmode = insn_data[icode].operand[0].mode;
37908 machine_mode mode0 = insn_data[icode].operand[1].mode;
37910 if (optimize || !target
37911 || GET_MODE (target) != tmode
37912 || !insn_data[icode].operand[0].predicate (target, tmode))
37913 target = gen_reg_rtx (tmode);
37915 if (VECTOR_MODE_P (mode0))
37916 op0 = safe_vector_operand (op0, mode0);
37918 if ((optimize && !register_operand (op0, mode0))
37919 || !insn_data[icode].operand[1].predicate (op0, mode0))
37920 op0 = copy_to_mode_reg (mode0, op0);
37923 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37924 op1 = copy_to_mode_reg (mode0, op1);
37926 pat = GEN_FCN (icode) (target, op0, op1);
37933 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
37936 ix86_expand_sse_compare (const struct builtin_description *d,
37937 tree exp, rtx target, bool swap)
37940 tree arg0 = CALL_EXPR_ARG (exp, 0);
37941 tree arg1 = CALL_EXPR_ARG (exp, 1);
37942 rtx op0 = expand_normal (arg0);
37943 rtx op1 = expand_normal (arg1);
37945 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37946 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37947 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37948 enum rtx_code comparison = d->comparison;
37950 if (VECTOR_MODE_P (mode0))
37951 op0 = safe_vector_operand (op0, mode0);
37952 if (VECTOR_MODE_P (mode1))
37953 op1 = safe_vector_operand (op1, mode1);
37955 /* Swap operands if we have a comparison that isn't available in
37958 std::swap (op0, op1);
37960 if (optimize || !target
37961 || GET_MODE (target) != tmode
37962 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37963 target = gen_reg_rtx (tmode);
37965 if ((optimize && !register_operand (op0, mode0))
37966 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
37967 op0 = copy_to_mode_reg (mode0, op0);
37968 if ((optimize && !register_operand (op1, mode1))
37969 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
37970 op1 = copy_to_mode_reg (mode1, op1);
37972 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
37973 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
37980 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
37983 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
37987 tree arg0 = CALL_EXPR_ARG (exp, 0);
37988 tree arg1 = CALL_EXPR_ARG (exp, 1);
37989 rtx op0 = expand_normal (arg0);
37990 rtx op1 = expand_normal (arg1);
37991 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
37992 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
37993 enum rtx_code comparison = d->comparison;
37995 if (VECTOR_MODE_P (mode0))
37996 op0 = safe_vector_operand (op0, mode0);
37997 if (VECTOR_MODE_P (mode1))
37998 op1 = safe_vector_operand (op1, mode1);
38000 /* Swap operands if we have a comparison that isn't available in
38002 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38003 std::swap (op0, op1);
38005 target = gen_reg_rtx (SImode);
38006 emit_move_insn (target, const0_rtx);
38007 target = gen_rtx_SUBREG (QImode, target, 0);
38009 if ((optimize && !register_operand (op0, mode0))
38010 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38011 op0 = copy_to_mode_reg (mode0, op0);
38012 if ((optimize && !register_operand (op1, mode1))
38013 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38014 op1 = copy_to_mode_reg (mode1, op1);
38016 pat = GEN_FCN (d->icode) (op0, op1);
38020 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38021 gen_rtx_fmt_ee (comparison, QImode,
38025 return SUBREG_REG (target);
38028 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38031 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38035 tree arg0 = CALL_EXPR_ARG (exp, 0);
38036 rtx op1, op0 = expand_normal (arg0);
38037 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38038 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38040 if (optimize || target == 0
38041 || GET_MODE (target) != tmode
38042 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38043 target = gen_reg_rtx (tmode);
38045 if (VECTOR_MODE_P (mode0))
38046 op0 = safe_vector_operand (op0, mode0);
38048 if ((optimize && !register_operand (op0, mode0))
38049 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38050 op0 = copy_to_mode_reg (mode0, op0);
38052 op1 = GEN_INT (d->comparison);
38054 pat = GEN_FCN (d->icode) (target, op0, op1);
38062 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
38063 tree exp, rtx target)
38066 tree arg0 = CALL_EXPR_ARG (exp, 0);
38067 tree arg1 = CALL_EXPR_ARG (exp, 1);
38068 rtx op0 = expand_normal (arg0);
38069 rtx op1 = expand_normal (arg1);
38071 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38072 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38073 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38075 if (optimize || target == 0
38076 || GET_MODE (target) != tmode
38077 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38078 target = gen_reg_rtx (tmode);
38080 op0 = safe_vector_operand (op0, mode0);
38081 op1 = safe_vector_operand (op1, mode1);
38083 if ((optimize && !register_operand (op0, mode0))
38084 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38085 op0 = copy_to_mode_reg (mode0, op0);
38086 if ((optimize && !register_operand (op1, mode1))
38087 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38088 op1 = copy_to_mode_reg (mode1, op1);
38090 op2 = GEN_INT (d->comparison);
38092 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38099 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
38102 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
38106 tree arg0 = CALL_EXPR_ARG (exp, 0);
38107 tree arg1 = CALL_EXPR_ARG (exp, 1);
38108 rtx op0 = expand_normal (arg0);
38109 rtx op1 = expand_normal (arg1);
38110 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38111 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38112 enum rtx_code comparison = d->comparison;
38114 if (VECTOR_MODE_P (mode0))
38115 op0 = safe_vector_operand (op0, mode0);
38116 if (VECTOR_MODE_P (mode1))
38117 op1 = safe_vector_operand (op1, mode1);
38119 target = gen_reg_rtx (SImode);
38120 emit_move_insn (target, const0_rtx);
38121 target = gen_rtx_SUBREG (QImode, target, 0);
38123 if ((optimize && !register_operand (op0, mode0))
38124 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38125 op0 = copy_to_mode_reg (mode0, op0);
38126 if ((optimize && !register_operand (op1, mode1))
38127 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38128 op1 = copy_to_mode_reg (mode1, op1);
38130 pat = GEN_FCN (d->icode) (op0, op1);
38134 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38135 gen_rtx_fmt_ee (comparison, QImode,
38139 return SUBREG_REG (target);
38142 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
38145 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
38146 tree exp, rtx target)
38149 tree arg0 = CALL_EXPR_ARG (exp, 0);
38150 tree arg1 = CALL_EXPR_ARG (exp, 1);
38151 tree arg2 = CALL_EXPR_ARG (exp, 2);
38152 tree arg3 = CALL_EXPR_ARG (exp, 3);
38153 tree arg4 = CALL_EXPR_ARG (exp, 4);
38154 rtx scratch0, scratch1;
38155 rtx op0 = expand_normal (arg0);
38156 rtx op1 = expand_normal (arg1);
38157 rtx op2 = expand_normal (arg2);
38158 rtx op3 = expand_normal (arg3);
38159 rtx op4 = expand_normal (arg4);
38160 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
38162 tmode0 = insn_data[d->icode].operand[0].mode;
38163 tmode1 = insn_data[d->icode].operand[1].mode;
38164 modev2 = insn_data[d->icode].operand[2].mode;
38165 modei3 = insn_data[d->icode].operand[3].mode;
38166 modev4 = insn_data[d->icode].operand[4].mode;
38167 modei5 = insn_data[d->icode].operand[5].mode;
38168 modeimm = insn_data[d->icode].operand[6].mode;
38170 if (VECTOR_MODE_P (modev2))
38171 op0 = safe_vector_operand (op0, modev2);
38172 if (VECTOR_MODE_P (modev4))
38173 op2 = safe_vector_operand (op2, modev4);
38175 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38176 op0 = copy_to_mode_reg (modev2, op0);
38177 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
38178 op1 = copy_to_mode_reg (modei3, op1);
38179 if ((optimize && !register_operand (op2, modev4))
38180 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
38181 op2 = copy_to_mode_reg (modev4, op2);
38182 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
38183 op3 = copy_to_mode_reg (modei5, op3);
38185 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
38187 error ("the fifth argument must be an 8-bit immediate");
38191 if (d->code == IX86_BUILTIN_PCMPESTRI128)
38193 if (optimize || !target
38194 || GET_MODE (target) != tmode0
38195 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38196 target = gen_reg_rtx (tmode0);
38198 scratch1 = gen_reg_rtx (tmode1);
38200 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
38202 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
38204 if (optimize || !target
38205 || GET_MODE (target) != tmode1
38206 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38207 target = gen_reg_rtx (tmode1);
38209 scratch0 = gen_reg_rtx (tmode0);
38211 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
38215 gcc_assert (d->flag);
38217 scratch0 = gen_reg_rtx (tmode0);
38218 scratch1 = gen_reg_rtx (tmode1);
38220 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
38230 target = gen_reg_rtx (SImode);
38231 emit_move_insn (target, const0_rtx);
38232 target = gen_rtx_SUBREG (QImode, target, 0);
38235 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38236 gen_rtx_fmt_ee (EQ, QImode,
38237 gen_rtx_REG ((machine_mode) d->flag,
38240 return SUBREG_REG (target);
38247 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
38250 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
38251 tree exp, rtx target)
38254 tree arg0 = CALL_EXPR_ARG (exp, 0);
38255 tree arg1 = CALL_EXPR_ARG (exp, 1);
38256 tree arg2 = CALL_EXPR_ARG (exp, 2);
38257 rtx scratch0, scratch1;
38258 rtx op0 = expand_normal (arg0);
38259 rtx op1 = expand_normal (arg1);
38260 rtx op2 = expand_normal (arg2);
38261 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
38263 tmode0 = insn_data[d->icode].operand[0].mode;
38264 tmode1 = insn_data[d->icode].operand[1].mode;
38265 modev2 = insn_data[d->icode].operand[2].mode;
38266 modev3 = insn_data[d->icode].operand[3].mode;
38267 modeimm = insn_data[d->icode].operand[4].mode;
38269 if (VECTOR_MODE_P (modev2))
38270 op0 = safe_vector_operand (op0, modev2);
38271 if (VECTOR_MODE_P (modev3))
38272 op1 = safe_vector_operand (op1, modev3);
38274 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38275 op0 = copy_to_mode_reg (modev2, op0);
38276 if ((optimize && !register_operand (op1, modev3))
38277 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
38278 op1 = copy_to_mode_reg (modev3, op1);
38280 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
38282 error ("the third argument must be an 8-bit immediate");
38286 if (d->code == IX86_BUILTIN_PCMPISTRI128)
38288 if (optimize || !target
38289 || GET_MODE (target) != tmode0
38290 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38291 target = gen_reg_rtx (tmode0);
38293 scratch1 = gen_reg_rtx (tmode1);
38295 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
38297 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
38299 if (optimize || !target
38300 || GET_MODE (target) != tmode1
38301 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38302 target = gen_reg_rtx (tmode1);
38304 scratch0 = gen_reg_rtx (tmode0);
38306 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
38310 gcc_assert (d->flag);
38312 scratch0 = gen_reg_rtx (tmode0);
38313 scratch1 = gen_reg_rtx (tmode1);
38315 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
38325 target = gen_reg_rtx (SImode);
38326 emit_move_insn (target, const0_rtx);
38327 target = gen_rtx_SUBREG (QImode, target, 0);
38330 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38331 gen_rtx_fmt_ee (EQ, QImode,
38332 gen_rtx_REG ((machine_mode) d->flag,
38335 return SUBREG_REG (target);
38341 /* Subroutine of ix86_expand_builtin to take care of insns with
38342 variable number of operands. */
38345 ix86_expand_args_builtin (const struct builtin_description *d,
38346 tree exp, rtx target)
38348 rtx pat, real_target;
38349 unsigned int i, nargs;
38350 unsigned int nargs_constant = 0;
38351 unsigned int mask_pos = 0;
38352 int num_memory = 0;
38358 bool last_arg_count = false;
38359 enum insn_code icode = d->icode;
38360 const struct insn_data_d *insn_p = &insn_data[icode];
38361 machine_mode tmode = insn_p->operand[0].mode;
38362 machine_mode rmode = VOIDmode;
38364 enum rtx_code comparison = d->comparison;
38366 switch ((enum ix86_builtin_func_type) d->flag)
38368 case V2DF_FTYPE_V2DF_ROUND:
38369 case V4DF_FTYPE_V4DF_ROUND:
38370 case V4SF_FTYPE_V4SF_ROUND:
38371 case V8SF_FTYPE_V8SF_ROUND:
38372 case V4SI_FTYPE_V4SF_ROUND:
38373 case V8SI_FTYPE_V8SF_ROUND:
38374 return ix86_expand_sse_round (d, exp, target);
38375 case V4SI_FTYPE_V2DF_V2DF_ROUND:
38376 case V8SI_FTYPE_V4DF_V4DF_ROUND:
38377 case V16SI_FTYPE_V8DF_V8DF_ROUND:
38378 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
38379 case INT_FTYPE_V8SF_V8SF_PTEST:
38380 case INT_FTYPE_V4DI_V4DI_PTEST:
38381 case INT_FTYPE_V4DF_V4DF_PTEST:
38382 case INT_FTYPE_V4SF_V4SF_PTEST:
38383 case INT_FTYPE_V2DI_V2DI_PTEST:
38384 case INT_FTYPE_V2DF_V2DF_PTEST:
38385 return ix86_expand_sse_ptest (d, exp, target);
38386 case FLOAT128_FTYPE_FLOAT128:
38387 case FLOAT_FTYPE_FLOAT:
38388 case INT_FTYPE_INT:
38389 case UINT64_FTYPE_INT:
38390 case UINT16_FTYPE_UINT16:
38391 case INT64_FTYPE_INT64:
38392 case INT64_FTYPE_V4SF:
38393 case INT64_FTYPE_V2DF:
38394 case INT_FTYPE_V16QI:
38395 case INT_FTYPE_V8QI:
38396 case INT_FTYPE_V8SF:
38397 case INT_FTYPE_V4DF:
38398 case INT_FTYPE_V4SF:
38399 case INT_FTYPE_V2DF:
38400 case INT_FTYPE_V32QI:
38401 case V16QI_FTYPE_V16QI:
38402 case V8SI_FTYPE_V8SF:
38403 case V8SI_FTYPE_V4SI:
38404 case V8HI_FTYPE_V8HI:
38405 case V8HI_FTYPE_V16QI:
38406 case V8QI_FTYPE_V8QI:
38407 case V8SF_FTYPE_V8SF:
38408 case V8SF_FTYPE_V8SI:
38409 case V8SF_FTYPE_V4SF:
38410 case V8SF_FTYPE_V8HI:
38411 case V4SI_FTYPE_V4SI:
38412 case V4SI_FTYPE_V16QI:
38413 case V4SI_FTYPE_V4SF:
38414 case V4SI_FTYPE_V8SI:
38415 case V4SI_FTYPE_V8HI:
38416 case V4SI_FTYPE_V4DF:
38417 case V4SI_FTYPE_V2DF:
38418 case V4HI_FTYPE_V4HI:
38419 case V4DF_FTYPE_V4DF:
38420 case V4DF_FTYPE_V4SI:
38421 case V4DF_FTYPE_V4SF:
38422 case V4DF_FTYPE_V2DF:
38423 case V4SF_FTYPE_V4SF:
38424 case V4SF_FTYPE_V4SI:
38425 case V4SF_FTYPE_V8SF:
38426 case V4SF_FTYPE_V4DF:
38427 case V4SF_FTYPE_V8HI:
38428 case V4SF_FTYPE_V2DF:
38429 case V2DI_FTYPE_V2DI:
38430 case V2DI_FTYPE_V16QI:
38431 case V2DI_FTYPE_V8HI:
38432 case V2DI_FTYPE_V4SI:
38433 case V2DF_FTYPE_V2DF:
38434 case V2DF_FTYPE_V4SI:
38435 case V2DF_FTYPE_V4DF:
38436 case V2DF_FTYPE_V4SF:
38437 case V2DF_FTYPE_V2SI:
38438 case V2SI_FTYPE_V2SI:
38439 case V2SI_FTYPE_V4SF:
38440 case V2SI_FTYPE_V2SF:
38441 case V2SI_FTYPE_V2DF:
38442 case V2SF_FTYPE_V2SF:
38443 case V2SF_FTYPE_V2SI:
38444 case V32QI_FTYPE_V32QI:
38445 case V32QI_FTYPE_V16QI:
38446 case V16HI_FTYPE_V16HI:
38447 case V16HI_FTYPE_V8HI:
38448 case V8SI_FTYPE_V8SI:
38449 case V16HI_FTYPE_V16QI:
38450 case V8SI_FTYPE_V16QI:
38451 case V4DI_FTYPE_V16QI:
38452 case V8SI_FTYPE_V8HI:
38453 case V4DI_FTYPE_V8HI:
38454 case V4DI_FTYPE_V4SI:
38455 case V4DI_FTYPE_V2DI:
38456 case UHI_FTYPE_UHI:
38457 case UHI_FTYPE_V16QI:
38458 case USI_FTYPE_V32QI:
38459 case UDI_FTYPE_V64QI:
38460 case V16QI_FTYPE_UHI:
38461 case V32QI_FTYPE_USI:
38462 case V64QI_FTYPE_UDI:
38463 case V8HI_FTYPE_UQI:
38464 case V16HI_FTYPE_UHI:
38465 case V32HI_FTYPE_USI:
38466 case V4SI_FTYPE_UQI:
38467 case V8SI_FTYPE_UQI:
38468 case V4SI_FTYPE_UHI:
38469 case V8SI_FTYPE_UHI:
38470 case UQI_FTYPE_V8HI:
38471 case UHI_FTYPE_V16HI:
38472 case USI_FTYPE_V32HI:
38473 case UQI_FTYPE_V4SI:
38474 case UQI_FTYPE_V8SI:
38475 case UHI_FTYPE_V16SI:
38476 case UQI_FTYPE_V2DI:
38477 case UQI_FTYPE_V4DI:
38478 case UQI_FTYPE_V8DI:
38479 case V16SI_FTYPE_UHI:
38480 case V2DI_FTYPE_UQI:
38481 case V4DI_FTYPE_UQI:
38482 case V16SI_FTYPE_INT:
38483 case V16SF_FTYPE_V8SF:
38484 case V16SI_FTYPE_V8SI:
38485 case V16SF_FTYPE_V4SF:
38486 case V16SI_FTYPE_V4SI:
38487 case V16SF_FTYPE_V16SF:
38488 case V8DI_FTYPE_UQI:
38489 case V8DF_FTYPE_V4DF:
38490 case V8DF_FTYPE_V2DF:
38491 case V8DF_FTYPE_V8DF:
38494 case V4SF_FTYPE_V4SF_VEC_MERGE:
38495 case V2DF_FTYPE_V2DF_VEC_MERGE:
38496 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38497 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38498 case V16QI_FTYPE_V16QI_V16QI:
38499 case V16QI_FTYPE_V8HI_V8HI:
38500 case V16SF_FTYPE_V16SF_V16SF:
38501 case V8QI_FTYPE_V8QI_V8QI:
38502 case V8QI_FTYPE_V4HI_V4HI:
38503 case V8HI_FTYPE_V8HI_V8HI:
38504 case V8HI_FTYPE_V16QI_V16QI:
38505 case V8HI_FTYPE_V4SI_V4SI:
38506 case V8SF_FTYPE_V8SF_V8SF:
38507 case V8SF_FTYPE_V8SF_V8SI:
38508 case V8DF_FTYPE_V8DF_V8DF:
38509 case V4SI_FTYPE_V4SI_V4SI:
38510 case V4SI_FTYPE_V8HI_V8HI:
38511 case V4SI_FTYPE_V2DF_V2DF:
38512 case V4HI_FTYPE_V4HI_V4HI:
38513 case V4HI_FTYPE_V8QI_V8QI:
38514 case V4HI_FTYPE_V2SI_V2SI:
38515 case V4DF_FTYPE_V4DF_V4DF:
38516 case V4DF_FTYPE_V4DF_V4DI:
38517 case V4SF_FTYPE_V4SF_V4SF:
38518 case V4SF_FTYPE_V4SF_V4SI:
38519 case V4SF_FTYPE_V4SF_V2SI:
38520 case V4SF_FTYPE_V4SF_V2DF:
38521 case V4SF_FTYPE_V4SF_UINT:
38522 case V4SF_FTYPE_V4SF_DI:
38523 case V4SF_FTYPE_V4SF_SI:
38524 case V2DI_FTYPE_V2DI_V2DI:
38525 case V2DI_FTYPE_V16QI_V16QI:
38526 case V2DI_FTYPE_V4SI_V4SI:
38527 case V2DI_FTYPE_V2DI_V16QI:
38528 case V2SI_FTYPE_V2SI_V2SI:
38529 case V2SI_FTYPE_V4HI_V4HI:
38530 case V2SI_FTYPE_V2SF_V2SF:
38531 case V2DF_FTYPE_V2DF_V2DF:
38532 case V2DF_FTYPE_V2DF_V4SF:
38533 case V2DF_FTYPE_V2DF_V2DI:
38534 case V2DF_FTYPE_V2DF_DI:
38535 case V2DF_FTYPE_V2DF_SI:
38536 case V2DF_FTYPE_V2DF_UINT:
38537 case V2SF_FTYPE_V2SF_V2SF:
38538 case V1DI_FTYPE_V1DI_V1DI:
38539 case V1DI_FTYPE_V8QI_V8QI:
38540 case V1DI_FTYPE_V2SI_V2SI:
38541 case V32QI_FTYPE_V16HI_V16HI:
38542 case V16HI_FTYPE_V8SI_V8SI:
38543 case V32QI_FTYPE_V32QI_V32QI:
38544 case V16HI_FTYPE_V32QI_V32QI:
38545 case V16HI_FTYPE_V16HI_V16HI:
38546 case V8SI_FTYPE_V4DF_V4DF:
38547 case V8SI_FTYPE_V8SI_V8SI:
38548 case V8SI_FTYPE_V16HI_V16HI:
38549 case V4DI_FTYPE_V4DI_V4DI:
38550 case V4DI_FTYPE_V8SI_V8SI:
38551 case V8DI_FTYPE_V64QI_V64QI:
38552 if (comparison == UNKNOWN)
38553 return ix86_expand_binop_builtin (icode, exp, target);
38556 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38557 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38558 gcc_assert (comparison != UNKNOWN);
38562 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38563 case V16HI_FTYPE_V16HI_SI_COUNT:
38564 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38565 case V8SI_FTYPE_V8SI_SI_COUNT:
38566 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38567 case V4DI_FTYPE_V4DI_INT_COUNT:
38568 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38569 case V8HI_FTYPE_V8HI_SI_COUNT:
38570 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38571 case V4SI_FTYPE_V4SI_SI_COUNT:
38572 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38573 case V4HI_FTYPE_V4HI_SI_COUNT:
38574 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38575 case V2DI_FTYPE_V2DI_SI_COUNT:
38576 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38577 case V2SI_FTYPE_V2SI_SI_COUNT:
38578 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38579 case V1DI_FTYPE_V1DI_SI_COUNT:
38581 last_arg_count = true;
38583 case UINT64_FTYPE_UINT64_UINT64:
38584 case UINT_FTYPE_UINT_UINT:
38585 case UINT_FTYPE_UINT_USHORT:
38586 case UINT_FTYPE_UINT_UCHAR:
38587 case UINT16_FTYPE_UINT16_INT:
38588 case UINT8_FTYPE_UINT8_INT:
38589 case UHI_FTYPE_UHI_UHI:
38590 case USI_FTYPE_USI_USI:
38591 case UDI_FTYPE_UDI_UDI:
38592 case V16SI_FTYPE_V8DF_V8DF:
38595 case V2DI_FTYPE_V2DI_INT_CONVERT:
38598 nargs_constant = 1;
38600 case V4DI_FTYPE_V4DI_INT_CONVERT:
38603 nargs_constant = 1;
38605 case V8DI_FTYPE_V8DI_INT_CONVERT:
38608 nargs_constant = 1;
38610 case V8HI_FTYPE_V8HI_INT:
38611 case V8HI_FTYPE_V8SF_INT:
38612 case V16HI_FTYPE_V16SF_INT:
38613 case V8HI_FTYPE_V4SF_INT:
38614 case V8SF_FTYPE_V8SF_INT:
38615 case V4SF_FTYPE_V16SF_INT:
38616 case V16SF_FTYPE_V16SF_INT:
38617 case V4SI_FTYPE_V4SI_INT:
38618 case V4SI_FTYPE_V8SI_INT:
38619 case V4HI_FTYPE_V4HI_INT:
38620 case V4DF_FTYPE_V4DF_INT:
38621 case V4DF_FTYPE_V8DF_INT:
38622 case V4SF_FTYPE_V4SF_INT:
38623 case V4SF_FTYPE_V8SF_INT:
38624 case V2DI_FTYPE_V2DI_INT:
38625 case V2DF_FTYPE_V2DF_INT:
38626 case V2DF_FTYPE_V4DF_INT:
38627 case V16HI_FTYPE_V16HI_INT:
38628 case V8SI_FTYPE_V8SI_INT:
38629 case V16SI_FTYPE_V16SI_INT:
38630 case V4SI_FTYPE_V16SI_INT:
38631 case V4DI_FTYPE_V4DI_INT:
38632 case V2DI_FTYPE_V4DI_INT:
38633 case V4DI_FTYPE_V8DI_INT:
38634 case QI_FTYPE_V4SF_INT:
38635 case QI_FTYPE_V2DF_INT:
38637 nargs_constant = 1;
38639 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38640 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38641 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38642 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38643 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38644 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38645 case UHI_FTYPE_V16SI_V16SI_UHI:
38646 case UQI_FTYPE_V8DI_V8DI_UQI:
38647 case V16HI_FTYPE_V16SI_V16HI_UHI:
38648 case V16QI_FTYPE_V16SI_V16QI_UHI:
38649 case V16QI_FTYPE_V8DI_V16QI_UQI:
38650 case V16SF_FTYPE_V16SF_V16SF_UHI:
38651 case V16SF_FTYPE_V4SF_V16SF_UHI:
38652 case V16SI_FTYPE_SI_V16SI_UHI:
38653 case V16SI_FTYPE_V16HI_V16SI_UHI:
38654 case V16SI_FTYPE_V16QI_V16SI_UHI:
38655 case V8SF_FTYPE_V4SF_V8SF_UQI:
38656 case V4DF_FTYPE_V2DF_V4DF_UQI:
38657 case V8SI_FTYPE_V4SI_V8SI_UQI:
38658 case V8SI_FTYPE_SI_V8SI_UQI:
38659 case V4SI_FTYPE_V4SI_V4SI_UQI:
38660 case V4SI_FTYPE_SI_V4SI_UQI:
38661 case V4DI_FTYPE_V2DI_V4DI_UQI:
38662 case V4DI_FTYPE_DI_V4DI_UQI:
38663 case V2DI_FTYPE_V2DI_V2DI_UQI:
38664 case V2DI_FTYPE_DI_V2DI_UQI:
38665 case V64QI_FTYPE_V64QI_V64QI_UDI:
38666 case V64QI_FTYPE_V16QI_V64QI_UDI:
38667 case V64QI_FTYPE_QI_V64QI_UDI:
38668 case V32QI_FTYPE_V32QI_V32QI_USI:
38669 case V32QI_FTYPE_V16QI_V32QI_USI:
38670 case V32QI_FTYPE_QI_V32QI_USI:
38671 case V16QI_FTYPE_V16QI_V16QI_UHI:
38672 case V16QI_FTYPE_QI_V16QI_UHI:
38673 case V32HI_FTYPE_V8HI_V32HI_USI:
38674 case V32HI_FTYPE_HI_V32HI_USI:
38675 case V16HI_FTYPE_V8HI_V16HI_UHI:
38676 case V16HI_FTYPE_HI_V16HI_UHI:
38677 case V8HI_FTYPE_V8HI_V8HI_UQI:
38678 case V8HI_FTYPE_HI_V8HI_UQI:
38679 case V8SF_FTYPE_V8HI_V8SF_UQI:
38680 case V4SF_FTYPE_V8HI_V4SF_UQI:
38681 case V8SI_FTYPE_V8SF_V8SI_UQI:
38682 case V4SI_FTYPE_V4SF_V4SI_UQI:
38683 case V4DI_FTYPE_V4SF_V4DI_UQI:
38684 case V2DI_FTYPE_V4SF_V2DI_UQI:
38685 case V4SF_FTYPE_V4DI_V4SF_UQI:
38686 case V4SF_FTYPE_V2DI_V4SF_UQI:
38687 case V4DF_FTYPE_V4DI_V4DF_UQI:
38688 case V2DF_FTYPE_V2DI_V2DF_UQI:
38689 case V16QI_FTYPE_V8HI_V16QI_UQI:
38690 case V16QI_FTYPE_V16HI_V16QI_UHI:
38691 case V16QI_FTYPE_V4SI_V16QI_UQI:
38692 case V16QI_FTYPE_V8SI_V16QI_UQI:
38693 case V8HI_FTYPE_V4SI_V8HI_UQI:
38694 case V8HI_FTYPE_V8SI_V8HI_UQI:
38695 case V16QI_FTYPE_V2DI_V16QI_UQI:
38696 case V16QI_FTYPE_V4DI_V16QI_UQI:
38697 case V8HI_FTYPE_V2DI_V8HI_UQI:
38698 case V8HI_FTYPE_V4DI_V8HI_UQI:
38699 case V4SI_FTYPE_V2DI_V4SI_UQI:
38700 case V4SI_FTYPE_V4DI_V4SI_UQI:
38701 case V32QI_FTYPE_V32HI_V32QI_USI:
38702 case UHI_FTYPE_V16QI_V16QI_UHI:
38703 case USI_FTYPE_V32QI_V32QI_USI:
38704 case UDI_FTYPE_V64QI_V64QI_UDI:
38705 case UQI_FTYPE_V8HI_V8HI_UQI:
38706 case UHI_FTYPE_V16HI_V16HI_UHI:
38707 case USI_FTYPE_V32HI_V32HI_USI:
38708 case UQI_FTYPE_V4SI_V4SI_UQI:
38709 case UQI_FTYPE_V8SI_V8SI_UQI:
38710 case UQI_FTYPE_V2DI_V2DI_UQI:
38711 case UQI_FTYPE_V4DI_V4DI_UQI:
38712 case V4SF_FTYPE_V2DF_V4SF_UQI:
38713 case V4SF_FTYPE_V4DF_V4SF_UQI:
38714 case V16SI_FTYPE_V16SI_V16SI_UHI:
38715 case V16SI_FTYPE_V4SI_V16SI_UHI:
38716 case V2DI_FTYPE_V4SI_V2DI_UQI:
38717 case V2DI_FTYPE_V8HI_V2DI_UQI:
38718 case V2DI_FTYPE_V16QI_V2DI_UQI:
38719 case V4DI_FTYPE_V4DI_V4DI_UQI:
38720 case V4DI_FTYPE_V4SI_V4DI_UQI:
38721 case V4DI_FTYPE_V8HI_V4DI_UQI:
38722 case V4DI_FTYPE_V16QI_V4DI_UQI:
38723 case V4DI_FTYPE_V4DF_V4DI_UQI:
38724 case V2DI_FTYPE_V2DF_V2DI_UQI:
38725 case V4SI_FTYPE_V4DF_V4SI_UQI:
38726 case V4SI_FTYPE_V2DF_V4SI_UQI:
38727 case V4SI_FTYPE_V8HI_V4SI_UQI:
38728 case V4SI_FTYPE_V16QI_V4SI_UQI:
38729 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38730 case V8DF_FTYPE_V2DF_V8DF_UQI:
38731 case V8DF_FTYPE_V4DF_V8DF_UQI:
38732 case V8DF_FTYPE_V8DF_V8DF_UQI:
38733 case V8SF_FTYPE_V8SF_V8SF_UQI:
38734 case V8SF_FTYPE_V8SI_V8SF_UQI:
38735 case V4DF_FTYPE_V4DF_V4DF_UQI:
38736 case V4SF_FTYPE_V4SF_V4SF_UQI:
38737 case V2DF_FTYPE_V2DF_V2DF_UQI:
38738 case V2DF_FTYPE_V4SF_V2DF_UQI:
38739 case V2DF_FTYPE_V4SI_V2DF_UQI:
38740 case V4SF_FTYPE_V4SI_V4SF_UQI:
38741 case V4DF_FTYPE_V4SF_V4DF_UQI:
38742 case V4DF_FTYPE_V4SI_V4DF_UQI:
38743 case V8SI_FTYPE_V8SI_V8SI_UQI:
38744 case V8SI_FTYPE_V8HI_V8SI_UQI:
38745 case V8SI_FTYPE_V16QI_V8SI_UQI:
38746 case V8DF_FTYPE_V8SI_V8DF_UQI:
38747 case V8DI_FTYPE_DI_V8DI_UQI:
38748 case V16SF_FTYPE_V8SF_V16SF_UHI:
38749 case V16SI_FTYPE_V8SI_V16SI_UHI:
38750 case V16HI_FTYPE_V16HI_V16HI_UHI:
38751 case V8HI_FTYPE_V16QI_V8HI_UQI:
38752 case V16HI_FTYPE_V16QI_V16HI_UHI:
38753 case V32HI_FTYPE_V32HI_V32HI_USI:
38754 case V32HI_FTYPE_V32QI_V32HI_USI:
38755 case V8DI_FTYPE_V16QI_V8DI_UQI:
38756 case V8DI_FTYPE_V2DI_V8DI_UQI:
38757 case V8DI_FTYPE_V4DI_V8DI_UQI:
38758 case V8DI_FTYPE_V8DI_V8DI_UQI:
38759 case V8DI_FTYPE_V8HI_V8DI_UQI:
38760 case V8DI_FTYPE_V8SI_V8DI_UQI:
38761 case V8HI_FTYPE_V8DI_V8HI_UQI:
38762 case V8SI_FTYPE_V8DI_V8SI_UQI:
38763 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38766 case V32QI_FTYPE_V32QI_V32QI_INT:
38767 case V16HI_FTYPE_V16HI_V16HI_INT:
38768 case V16QI_FTYPE_V16QI_V16QI_INT:
38769 case V4DI_FTYPE_V4DI_V4DI_INT:
38770 case V8HI_FTYPE_V8HI_V8HI_INT:
38771 case V8SI_FTYPE_V8SI_V8SI_INT:
38772 case V8SI_FTYPE_V8SI_V4SI_INT:
38773 case V8SF_FTYPE_V8SF_V8SF_INT:
38774 case V8SF_FTYPE_V8SF_V4SF_INT:
38775 case V4SI_FTYPE_V4SI_V4SI_INT:
38776 case V4DF_FTYPE_V4DF_V4DF_INT:
38777 case V16SF_FTYPE_V16SF_V16SF_INT:
38778 case V16SF_FTYPE_V16SF_V4SF_INT:
38779 case V16SI_FTYPE_V16SI_V4SI_INT:
38780 case V4DF_FTYPE_V4DF_V2DF_INT:
38781 case V4SF_FTYPE_V4SF_V4SF_INT:
38782 case V2DI_FTYPE_V2DI_V2DI_INT:
38783 case V4DI_FTYPE_V4DI_V2DI_INT:
38784 case V2DF_FTYPE_V2DF_V2DF_INT:
38785 case UQI_FTYPE_V8DI_V8UDI_INT:
38786 case UQI_FTYPE_V8DF_V8DF_INT:
38787 case UQI_FTYPE_V2DF_V2DF_INT:
38788 case UQI_FTYPE_V4SF_V4SF_INT:
38789 case UHI_FTYPE_V16SI_V16SI_INT:
38790 case UHI_FTYPE_V16SF_V16SF_INT:
38792 nargs_constant = 1;
38794 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38797 nargs_constant = 1;
38799 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38802 nargs_constant = 1;
38804 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38807 nargs_constant = 1;
38809 case V2DI_FTYPE_V2DI_UINT_UINT:
38811 nargs_constant = 2;
38813 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38816 nargs_constant = 1;
38818 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38822 nargs_constant = 1;
38824 case QI_FTYPE_V8DF_INT_UQI:
38825 case QI_FTYPE_V4DF_INT_UQI:
38826 case QI_FTYPE_V2DF_INT_UQI:
38827 case HI_FTYPE_V16SF_INT_UHI:
38828 case QI_FTYPE_V8SF_INT_UQI:
38829 case QI_FTYPE_V4SF_INT_UQI:
38832 nargs_constant = 1;
38834 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38838 nargs_constant = 1;
38840 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38844 nargs_constant = 1;
38846 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38847 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38848 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38849 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38850 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38851 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38852 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38853 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38854 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38855 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38856 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38857 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38858 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38859 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38860 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38861 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38862 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38863 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38864 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38865 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38866 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38867 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38868 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38869 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38870 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38871 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38872 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38873 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38874 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38875 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38876 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38877 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38878 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38879 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38880 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38881 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38882 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38883 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38884 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38885 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38886 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38887 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38888 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38889 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38890 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38891 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38892 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38893 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38894 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38895 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38896 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38899 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38900 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38901 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38902 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38903 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38905 nargs_constant = 1;
38907 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38908 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38909 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38910 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38911 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38912 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38913 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38914 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38915 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38916 case USI_FTYPE_V32QI_V32QI_INT_USI:
38917 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38918 case USI_FTYPE_V32HI_V32HI_INT_USI:
38919 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38920 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38923 nargs_constant = 1;
38925 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38927 nargs_constant = 2;
38929 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
38930 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
38933 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
38934 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
38937 nargs_constant = 1;
38939 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
38940 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
38941 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
38942 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
38943 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
38944 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
38945 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
38946 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
38947 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
38948 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
38949 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
38950 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
38951 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
38952 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
38953 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
38954 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
38955 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
38956 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
38957 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
38958 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
38959 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
38960 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
38961 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
38962 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
38963 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
38964 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
38965 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
38966 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
38967 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
38968 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
38971 nargs_constant = 1;
38973 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
38974 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
38975 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
38976 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
38977 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
38978 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
38979 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
38980 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
38981 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
38982 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
38983 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
38984 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
38985 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
38986 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
38987 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
38988 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
38989 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
38990 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
38991 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
38992 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
38993 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
38994 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
38995 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
38996 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
38997 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
38998 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
38999 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39002 nargs_constant = 1;
39004 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39005 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39006 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39007 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39008 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39009 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39010 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39011 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39012 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39013 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39017 nargs_constant = 1;
39021 gcc_unreachable ();
39024 gcc_assert (nargs <= ARRAY_SIZE (args));
39026 if (comparison != UNKNOWN)
39028 gcc_assert (nargs == 2);
39029 return ix86_expand_sse_compare (d, exp, target, swap);
39032 if (rmode == VOIDmode || rmode == tmode)
39036 || GET_MODE (target) != tmode
39037 || !insn_p->operand[0].predicate (target, tmode))
39038 target = gen_reg_rtx (tmode);
39039 real_target = target;
39043 real_target = gen_reg_rtx (tmode);
39044 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
39047 for (i = 0; i < nargs; i++)
39049 tree arg = CALL_EXPR_ARG (exp, i);
39050 rtx op = expand_normal (arg);
39051 machine_mode mode = insn_p->operand[i + 1].mode;
39052 bool match = insn_p->operand[i + 1].predicate (op, mode);
39054 if (last_arg_count && (i + 1) == nargs)
39056 /* SIMD shift insns take either an 8-bit immediate or
39057 register as count. But builtin functions take int as
39058 count. If count doesn't match, we put it in register. */
39061 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
39062 if (!insn_p->operand[i + 1].predicate (op, mode))
39063 op = copy_to_reg (op);
39066 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39067 (!mask_pos && (nargs - i) <= nargs_constant))
39072 case CODE_FOR_avx_vinsertf128v4di:
39073 case CODE_FOR_avx_vextractf128v4di:
39074 error ("the last argument must be an 1-bit immediate");
39077 case CODE_FOR_avx512f_cmpv8di3_mask:
39078 case CODE_FOR_avx512f_cmpv16si3_mask:
39079 case CODE_FOR_avx512f_ucmpv8di3_mask:
39080 case CODE_FOR_avx512f_ucmpv16si3_mask:
39081 case CODE_FOR_avx512vl_cmpv4di3_mask:
39082 case CODE_FOR_avx512vl_cmpv8si3_mask:
39083 case CODE_FOR_avx512vl_ucmpv4di3_mask:
39084 case CODE_FOR_avx512vl_ucmpv8si3_mask:
39085 case CODE_FOR_avx512vl_cmpv2di3_mask:
39086 case CODE_FOR_avx512vl_cmpv4si3_mask:
39087 case CODE_FOR_avx512vl_ucmpv2di3_mask:
39088 case CODE_FOR_avx512vl_ucmpv4si3_mask:
39089 error ("the last argument must be a 3-bit immediate");
39092 case CODE_FOR_sse4_1_roundsd:
39093 case CODE_FOR_sse4_1_roundss:
39095 case CODE_FOR_sse4_1_roundpd:
39096 case CODE_FOR_sse4_1_roundps:
39097 case CODE_FOR_avx_roundpd256:
39098 case CODE_FOR_avx_roundps256:
39100 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
39101 case CODE_FOR_sse4_1_roundps_sfix:
39102 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
39103 case CODE_FOR_avx_roundps_sfix256:
39105 case CODE_FOR_sse4_1_blendps:
39106 case CODE_FOR_avx_blendpd256:
39107 case CODE_FOR_avx_vpermilv4df:
39108 case CODE_FOR_avx_vpermilv4df_mask:
39109 case CODE_FOR_avx512f_getmantv8df_mask:
39110 case CODE_FOR_avx512f_getmantv16sf_mask:
39111 case CODE_FOR_avx512vl_getmantv8sf_mask:
39112 case CODE_FOR_avx512vl_getmantv4df_mask:
39113 case CODE_FOR_avx512vl_getmantv4sf_mask:
39114 case CODE_FOR_avx512vl_getmantv2df_mask:
39115 case CODE_FOR_avx512dq_rangepv8df_mask_round:
39116 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
39117 case CODE_FOR_avx512dq_rangepv4df_mask:
39118 case CODE_FOR_avx512dq_rangepv8sf_mask:
39119 case CODE_FOR_avx512dq_rangepv2df_mask:
39120 case CODE_FOR_avx512dq_rangepv4sf_mask:
39121 case CODE_FOR_avx_shufpd256_mask:
39122 error ("the last argument must be a 4-bit immediate");
39125 case CODE_FOR_sha1rnds4:
39126 case CODE_FOR_sse4_1_blendpd:
39127 case CODE_FOR_avx_vpermilv2df:
39128 case CODE_FOR_avx_vpermilv2df_mask:
39129 case CODE_FOR_xop_vpermil2v2df3:
39130 case CODE_FOR_xop_vpermil2v4sf3:
39131 case CODE_FOR_xop_vpermil2v4df3:
39132 case CODE_FOR_xop_vpermil2v8sf3:
39133 case CODE_FOR_avx512f_vinsertf32x4_mask:
39134 case CODE_FOR_avx512f_vinserti32x4_mask:
39135 case CODE_FOR_avx512f_vextractf32x4_mask:
39136 case CODE_FOR_avx512f_vextracti32x4_mask:
39137 case CODE_FOR_sse2_shufpd:
39138 case CODE_FOR_sse2_shufpd_mask:
39139 case CODE_FOR_avx512dq_shuf_f64x2_mask:
39140 case CODE_FOR_avx512dq_shuf_i64x2_mask:
39141 case CODE_FOR_avx512vl_shuf_i32x4_mask:
39142 case CODE_FOR_avx512vl_shuf_f32x4_mask:
39143 error ("the last argument must be a 2-bit immediate");
39146 case CODE_FOR_avx_vextractf128v4df:
39147 case CODE_FOR_avx_vextractf128v8sf:
39148 case CODE_FOR_avx_vextractf128v8si:
39149 case CODE_FOR_avx_vinsertf128v4df:
39150 case CODE_FOR_avx_vinsertf128v8sf:
39151 case CODE_FOR_avx_vinsertf128v8si:
39152 case CODE_FOR_avx512f_vinsertf64x4_mask:
39153 case CODE_FOR_avx512f_vinserti64x4_mask:
39154 case CODE_FOR_avx512f_vextractf64x4_mask:
39155 case CODE_FOR_avx512f_vextracti64x4_mask:
39156 case CODE_FOR_avx512dq_vinsertf32x8_mask:
39157 case CODE_FOR_avx512dq_vinserti32x8_mask:
39158 case CODE_FOR_avx512vl_vinsertv4df:
39159 case CODE_FOR_avx512vl_vinsertv4di:
39160 case CODE_FOR_avx512vl_vinsertv8sf:
39161 case CODE_FOR_avx512vl_vinsertv8si:
39162 error ("the last argument must be a 1-bit immediate");
39165 case CODE_FOR_avx_vmcmpv2df3:
39166 case CODE_FOR_avx_vmcmpv4sf3:
39167 case CODE_FOR_avx_cmpv2df3:
39168 case CODE_FOR_avx_cmpv4sf3:
39169 case CODE_FOR_avx_cmpv4df3:
39170 case CODE_FOR_avx_cmpv8sf3:
39171 case CODE_FOR_avx512f_cmpv8df3_mask:
39172 case CODE_FOR_avx512f_cmpv16sf3_mask:
39173 case CODE_FOR_avx512f_vmcmpv2df3_mask:
39174 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
39175 error ("the last argument must be a 5-bit immediate");
39179 switch (nargs_constant)
39182 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39183 (!mask_pos && (nargs - i) == nargs_constant))
39185 error ("the next to last argument must be an 8-bit immediate");
39189 error ("the last argument must be an 8-bit immediate");
39192 gcc_unreachable ();
39199 if (VECTOR_MODE_P (mode))
39200 op = safe_vector_operand (op, mode);
39202 /* If we aren't optimizing, only allow one memory operand to
39204 if (memory_operand (op, mode))
39207 op = fixup_modeless_constant (op, mode);
39209 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39211 if (optimize || !match || num_memory > 1)
39212 op = copy_to_mode_reg (mode, op);
39216 op = copy_to_reg (op);
39217 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39222 args[i].mode = mode;
39228 pat = GEN_FCN (icode) (real_target, args[0].op);
39231 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
39234 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39238 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39239 args[2].op, args[3].op);
39242 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39243 args[2].op, args[3].op, args[4].op);
39245 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39246 args[2].op, args[3].op, args[4].op,
39250 gcc_unreachable ();
39260 /* Transform pattern of following layout:
39263 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
39271 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
39275 (parallel [ A B ... ]) */
39278 ix86_erase_embedded_rounding (rtx pat)
39280 if (GET_CODE (pat) == INSN)
39281 pat = PATTERN (pat);
39283 gcc_assert (GET_CODE (pat) == PARALLEL);
39285 if (XVECLEN (pat, 0) == 2)
39287 rtx p0 = XVECEXP (pat, 0, 0);
39288 rtx p1 = XVECEXP (pat, 0, 1);
39290 gcc_assert (GET_CODE (p0) == SET
39291 && GET_CODE (p1) == UNSPEC
39292 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
39298 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
39302 for (; i < XVECLEN (pat, 0); ++i)
39304 rtx elem = XVECEXP (pat, 0, i);
39305 if (GET_CODE (elem) != UNSPEC
39306 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
39310 /* No more than 1 occurence was removed. */
39311 gcc_assert (j >= XVECLEN (pat, 0) - 1);
39313 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
39317 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
39320 ix86_expand_sse_comi_round (const struct builtin_description *d,
39321 tree exp, rtx target)
39324 tree arg0 = CALL_EXPR_ARG (exp, 0);
39325 tree arg1 = CALL_EXPR_ARG (exp, 1);
39326 tree arg2 = CALL_EXPR_ARG (exp, 2);
39327 tree arg3 = CALL_EXPR_ARG (exp, 3);
39328 rtx op0 = expand_normal (arg0);
39329 rtx op1 = expand_normal (arg1);
39330 rtx op2 = expand_normal (arg2);
39331 rtx op3 = expand_normal (arg3);
39332 enum insn_code icode = d->icode;
39333 const struct insn_data_d *insn_p = &insn_data[icode];
39334 machine_mode mode0 = insn_p->operand[0].mode;
39335 machine_mode mode1 = insn_p->operand[1].mode;
39336 enum rtx_code comparison = UNEQ;
39337 bool need_ucomi = false;
39339 /* See avxintrin.h for values. */
39340 enum rtx_code comi_comparisons[32] =
39342 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
39343 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
39344 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
39346 bool need_ucomi_values[32] =
39348 true, false, false, true, true, false, false, true,
39349 true, false, false, true, true, false, false, true,
39350 false, true, true, false, false, true, true, false,
39351 false, true, true, false, false, true, true, false
39354 if (!CONST_INT_P (op2))
39356 error ("the third argument must be comparison constant");
39359 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
39361 error ("incorrect comparison mode");
39365 if (!insn_p->operand[2].predicate (op3, SImode))
39367 error ("incorrect rounding operand");
39371 comparison = comi_comparisons[INTVAL (op2)];
39372 need_ucomi = need_ucomi_values[INTVAL (op2)];
39374 if (VECTOR_MODE_P (mode0))
39375 op0 = safe_vector_operand (op0, mode0);
39376 if (VECTOR_MODE_P (mode1))
39377 op1 = safe_vector_operand (op1, mode1);
39379 target = gen_reg_rtx (SImode);
39380 emit_move_insn (target, const0_rtx);
39381 target = gen_rtx_SUBREG (QImode, target, 0);
39383 if ((optimize && !register_operand (op0, mode0))
39384 || !insn_p->operand[0].predicate (op0, mode0))
39385 op0 = copy_to_mode_reg (mode0, op0);
39386 if ((optimize && !register_operand (op1, mode1))
39387 || !insn_p->operand[1].predicate (op1, mode1))
39388 op1 = copy_to_mode_reg (mode1, op1);
39391 icode = icode == CODE_FOR_sse_comi_round
39392 ? CODE_FOR_sse_ucomi_round
39393 : CODE_FOR_sse2_ucomi_round;
39395 pat = GEN_FCN (icode) (op0, op1, op3);
39399 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
39400 if (INTVAL (op3) == NO_ROUND)
39402 pat = ix86_erase_embedded_rounding (pat);
39406 set_dst = SET_DEST (pat);
39410 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
39411 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
39415 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39416 gen_rtx_fmt_ee (comparison, QImode,
39420 return SUBREG_REG (target);
39424 ix86_expand_round_builtin (const struct builtin_description *d,
39425 tree exp, rtx target)
39428 unsigned int i, nargs;
39434 enum insn_code icode = d->icode;
39435 const struct insn_data_d *insn_p = &insn_data[icode];
39436 machine_mode tmode = insn_p->operand[0].mode;
39437 unsigned int nargs_constant = 0;
39438 unsigned int redundant_embed_rnd = 0;
39440 switch ((enum ix86_builtin_func_type) d->flag)
39442 case UINT64_FTYPE_V2DF_INT:
39443 case UINT64_FTYPE_V4SF_INT:
39444 case UINT_FTYPE_V2DF_INT:
39445 case UINT_FTYPE_V4SF_INT:
39446 case INT64_FTYPE_V2DF_INT:
39447 case INT64_FTYPE_V4SF_INT:
39448 case INT_FTYPE_V2DF_INT:
39449 case INT_FTYPE_V4SF_INT:
39452 case V4SF_FTYPE_V4SF_UINT_INT:
39453 case V4SF_FTYPE_V4SF_UINT64_INT:
39454 case V2DF_FTYPE_V2DF_UINT64_INT:
39455 case V4SF_FTYPE_V4SF_INT_INT:
39456 case V4SF_FTYPE_V4SF_INT64_INT:
39457 case V2DF_FTYPE_V2DF_INT64_INT:
39458 case V4SF_FTYPE_V4SF_V4SF_INT:
39459 case V2DF_FTYPE_V2DF_V2DF_INT:
39460 case V4SF_FTYPE_V4SF_V2DF_INT:
39461 case V2DF_FTYPE_V2DF_V4SF_INT:
39464 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39465 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39466 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39467 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39468 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39469 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39470 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39471 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39472 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39473 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39474 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39475 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39476 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39477 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39480 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39481 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39482 nargs_constant = 2;
39485 case INT_FTYPE_V4SF_V4SF_INT_INT:
39486 case INT_FTYPE_V2DF_V2DF_INT_INT:
39487 return ix86_expand_sse_comi_round (d, exp, target);
39488 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39489 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39490 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39491 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39492 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39493 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39496 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39497 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39498 nargs_constant = 4;
39501 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39502 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39503 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39504 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39505 nargs_constant = 3;
39508 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39509 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39510 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39511 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39513 nargs_constant = 4;
39515 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39516 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39517 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39518 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39520 nargs_constant = 3;
39523 gcc_unreachable ();
39525 gcc_assert (nargs <= ARRAY_SIZE (args));
39529 || GET_MODE (target) != tmode
39530 || !insn_p->operand[0].predicate (target, tmode))
39531 target = gen_reg_rtx (tmode);
39533 for (i = 0; i < nargs; i++)
39535 tree arg = CALL_EXPR_ARG (exp, i);
39536 rtx op = expand_normal (arg);
39537 machine_mode mode = insn_p->operand[i + 1].mode;
39538 bool match = insn_p->operand[i + 1].predicate (op, mode);
39540 if (i == nargs - nargs_constant)
39546 case CODE_FOR_avx512f_getmantv8df_mask_round:
39547 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39548 case CODE_FOR_avx512f_vgetmantv2df_round:
39549 case CODE_FOR_avx512f_vgetmantv4sf_round:
39550 error ("the immediate argument must be a 4-bit immediate");
39552 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39553 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39554 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39555 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39556 error ("the immediate argument must be a 5-bit immediate");
39559 error ("the immediate argument must be an 8-bit immediate");
39564 else if (i == nargs-1)
39566 if (!insn_p->operand[nargs].predicate (op, SImode))
39568 error ("incorrect rounding operand");
39572 /* If there is no rounding use normal version of the pattern. */
39573 if (INTVAL (op) == NO_ROUND)
39574 redundant_embed_rnd = 1;
39578 if (VECTOR_MODE_P (mode))
39579 op = safe_vector_operand (op, mode);
39581 op = fixup_modeless_constant (op, mode);
39583 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39585 if (optimize || !match)
39586 op = copy_to_mode_reg (mode, op);
39590 op = copy_to_reg (op);
39591 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39596 args[i].mode = mode;
39602 pat = GEN_FCN (icode) (target, args[0].op);
39605 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39608 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39612 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39613 args[2].op, args[3].op);
39616 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39617 args[2].op, args[3].op, args[4].op);
39619 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39620 args[2].op, args[3].op, args[4].op,
39624 gcc_unreachable ();
39630 if (redundant_embed_rnd)
39631 pat = ix86_erase_embedded_rounding (pat);
39637 /* Subroutine of ix86_expand_builtin to take care of special insns
39638 with variable number of operands. */
39641 ix86_expand_special_args_builtin (const struct builtin_description *d,
39642 tree exp, rtx target)
39646 unsigned int i, nargs, arg_adjust, memory;
39647 bool aligned_mem = false;
39653 enum insn_code icode = d->icode;
39654 bool last_arg_constant = false;
39655 const struct insn_data_d *insn_p = &insn_data[icode];
39656 machine_mode tmode = insn_p->operand[0].mode;
39657 enum { load, store } klass;
39659 switch ((enum ix86_builtin_func_type) d->flag)
39661 case VOID_FTYPE_VOID:
39662 emit_insn (GEN_FCN (icode) (target));
39664 case VOID_FTYPE_UINT64:
39665 case VOID_FTYPE_UNSIGNED:
39671 case INT_FTYPE_VOID:
39672 case USHORT_FTYPE_VOID:
39673 case UINT64_FTYPE_VOID:
39674 case UNSIGNED_FTYPE_VOID:
39679 case UINT64_FTYPE_PUNSIGNED:
39680 case V2DI_FTYPE_PV2DI:
39681 case V4DI_FTYPE_PV4DI:
39682 case V32QI_FTYPE_PCCHAR:
39683 case V16QI_FTYPE_PCCHAR:
39684 case V8SF_FTYPE_PCV4SF:
39685 case V8SF_FTYPE_PCFLOAT:
39686 case V4SF_FTYPE_PCFLOAT:
39687 case V4DF_FTYPE_PCV2DF:
39688 case V4DF_FTYPE_PCDOUBLE:
39689 case V2DF_FTYPE_PCDOUBLE:
39690 case VOID_FTYPE_PVOID:
39691 case V8DI_FTYPE_PV8DI:
39697 case CODE_FOR_sse4_1_movntdqa:
39698 case CODE_FOR_avx2_movntdqa:
39699 case CODE_FOR_avx512f_movntdqa:
39700 aligned_mem = true;
39706 case VOID_FTYPE_PV2SF_V4SF:
39707 case VOID_FTYPE_PV8DI_V8DI:
39708 case VOID_FTYPE_PV4DI_V4DI:
39709 case VOID_FTYPE_PV2DI_V2DI:
39710 case VOID_FTYPE_PCHAR_V32QI:
39711 case VOID_FTYPE_PCHAR_V16QI:
39712 case VOID_FTYPE_PFLOAT_V16SF:
39713 case VOID_FTYPE_PFLOAT_V8SF:
39714 case VOID_FTYPE_PFLOAT_V4SF:
39715 case VOID_FTYPE_PDOUBLE_V8DF:
39716 case VOID_FTYPE_PDOUBLE_V4DF:
39717 case VOID_FTYPE_PDOUBLE_V2DF:
39718 case VOID_FTYPE_PLONGLONG_LONGLONG:
39719 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39720 case VOID_FTYPE_PINT_INT:
39723 /* Reserve memory operand for target. */
39724 memory = ARRAY_SIZE (args);
39727 /* These builtins and instructions require the memory
39728 to be properly aligned. */
39729 case CODE_FOR_avx_movntv4di:
39730 case CODE_FOR_sse2_movntv2di:
39731 case CODE_FOR_avx_movntv8sf:
39732 case CODE_FOR_sse_movntv4sf:
39733 case CODE_FOR_sse4a_vmmovntv4sf:
39734 case CODE_FOR_avx_movntv4df:
39735 case CODE_FOR_sse2_movntv2df:
39736 case CODE_FOR_sse4a_vmmovntv2df:
39737 case CODE_FOR_sse2_movntidi:
39738 case CODE_FOR_sse_movntq:
39739 case CODE_FOR_sse2_movntisi:
39740 case CODE_FOR_avx512f_movntv16sf:
39741 case CODE_FOR_avx512f_movntv8df:
39742 case CODE_FOR_avx512f_movntv8di:
39743 aligned_mem = true;
39749 case V4SF_FTYPE_V4SF_PCV2SF:
39750 case V2DF_FTYPE_V2DF_PCDOUBLE:
39755 case V8SF_FTYPE_PCV8SF_V8SI:
39756 case V4DF_FTYPE_PCV4DF_V4DI:
39757 case V4SF_FTYPE_PCV4SF_V4SI:
39758 case V2DF_FTYPE_PCV2DF_V2DI:
39759 case V8SI_FTYPE_PCV8SI_V8SI:
39760 case V4DI_FTYPE_PCV4DI_V4DI:
39761 case V4SI_FTYPE_PCV4SI_V4SI:
39762 case V2DI_FTYPE_PCV2DI_V2DI:
39767 case VOID_FTYPE_PV8DF_V8DF_UQI:
39768 case VOID_FTYPE_PV4DF_V4DF_UQI:
39769 case VOID_FTYPE_PV2DF_V2DF_UQI:
39770 case VOID_FTYPE_PV16SF_V16SF_UHI:
39771 case VOID_FTYPE_PV8SF_V8SF_UQI:
39772 case VOID_FTYPE_PV4SF_V4SF_UQI:
39773 case VOID_FTYPE_PV8DI_V8DI_UQI:
39774 case VOID_FTYPE_PV4DI_V4DI_UQI:
39775 case VOID_FTYPE_PV2DI_V2DI_UQI:
39776 case VOID_FTYPE_PV16SI_V16SI_UHI:
39777 case VOID_FTYPE_PV8SI_V8SI_UQI:
39778 case VOID_FTYPE_PV4SI_V4SI_UQI:
39781 /* These builtins and instructions require the memory
39782 to be properly aligned. */
39783 case CODE_FOR_avx512f_storev16sf_mask:
39784 case CODE_FOR_avx512f_storev16si_mask:
39785 case CODE_FOR_avx512f_storev8df_mask:
39786 case CODE_FOR_avx512f_storev8di_mask:
39787 case CODE_FOR_avx512vl_storev8sf_mask:
39788 case CODE_FOR_avx512vl_storev8si_mask:
39789 case CODE_FOR_avx512vl_storev4df_mask:
39790 case CODE_FOR_avx512vl_storev4di_mask:
39791 case CODE_FOR_avx512vl_storev4sf_mask:
39792 case CODE_FOR_avx512vl_storev4si_mask:
39793 case CODE_FOR_avx512vl_storev2df_mask:
39794 case CODE_FOR_avx512vl_storev2di_mask:
39795 aligned_mem = true;
39801 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39802 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39803 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39804 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39805 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39806 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39807 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39808 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39809 case VOID_FTYPE_PV8SI_V8DI_UQI:
39810 case VOID_FTYPE_PV8HI_V8DI_UQI:
39811 case VOID_FTYPE_PV16HI_V16SI_UHI:
39812 case VOID_FTYPE_PV16QI_V8DI_UQI:
39813 case VOID_FTYPE_PV16QI_V16SI_UHI:
39814 case VOID_FTYPE_PV4SI_V4DI_UQI:
39815 case VOID_FTYPE_PV4SI_V2DI_UQI:
39816 case VOID_FTYPE_PV8HI_V4DI_UQI:
39817 case VOID_FTYPE_PV8HI_V2DI_UQI:
39818 case VOID_FTYPE_PV8HI_V8SI_UQI:
39819 case VOID_FTYPE_PV8HI_V4SI_UQI:
39820 case VOID_FTYPE_PV16QI_V4DI_UQI:
39821 case VOID_FTYPE_PV16QI_V2DI_UQI:
39822 case VOID_FTYPE_PV16QI_V8SI_UQI:
39823 case VOID_FTYPE_PV16QI_V4SI_UQI:
39824 case VOID_FTYPE_PV8HI_V8HI_UQI:
39825 case VOID_FTYPE_PV16HI_V16HI_UHI:
39826 case VOID_FTYPE_PV32HI_V32HI_USI:
39827 case VOID_FTYPE_PV16QI_V16QI_UHI:
39828 case VOID_FTYPE_PV32QI_V32QI_USI:
39829 case VOID_FTYPE_PV64QI_V64QI_UDI:
39832 /* Reserve memory operand for target. */
39833 memory = ARRAY_SIZE (args);
39835 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39836 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39837 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39838 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39839 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39840 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39841 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39842 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39843 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39844 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39845 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39846 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39847 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39848 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39849 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39850 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39851 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39852 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39858 /* These builtins and instructions require the memory
39859 to be properly aligned. */
39860 case CODE_FOR_avx512f_loadv16sf_mask:
39861 case CODE_FOR_avx512f_loadv16si_mask:
39862 case CODE_FOR_avx512f_loadv8df_mask:
39863 case CODE_FOR_avx512f_loadv8di_mask:
39864 case CODE_FOR_avx512vl_loadv8sf_mask:
39865 case CODE_FOR_avx512vl_loadv8si_mask:
39866 case CODE_FOR_avx512vl_loadv4df_mask:
39867 case CODE_FOR_avx512vl_loadv4di_mask:
39868 case CODE_FOR_avx512vl_loadv4sf_mask:
39869 case CODE_FOR_avx512vl_loadv4si_mask:
39870 case CODE_FOR_avx512vl_loadv2df_mask:
39871 case CODE_FOR_avx512vl_loadv2di_mask:
39872 case CODE_FOR_avx512bw_loadv64qi_mask:
39873 case CODE_FOR_avx512vl_loadv32qi_mask:
39874 case CODE_FOR_avx512vl_loadv16qi_mask:
39875 case CODE_FOR_avx512bw_loadv32hi_mask:
39876 case CODE_FOR_avx512vl_loadv16hi_mask:
39877 case CODE_FOR_avx512vl_loadv8hi_mask:
39878 aligned_mem = true;
39884 case VOID_FTYPE_UINT_UINT_UINT:
39885 case VOID_FTYPE_UINT64_UINT_UINT:
39886 case UCHAR_FTYPE_UINT_UINT_UINT:
39887 case UCHAR_FTYPE_UINT64_UINT_UINT:
39890 memory = ARRAY_SIZE (args);
39891 last_arg_constant = true;
39894 gcc_unreachable ();
39897 gcc_assert (nargs <= ARRAY_SIZE (args));
39899 if (klass == store)
39901 arg = CALL_EXPR_ARG (exp, 0);
39902 op = expand_normal (arg);
39903 gcc_assert (target == 0);
39906 op = ix86_zero_extend_to_Pmode (op);
39907 target = gen_rtx_MEM (tmode, op);
39908 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39909 on it. Try to improve it using get_pointer_alignment,
39910 and if the special builtin is one that requires strict
39911 mode alignment, also from it's GET_MODE_ALIGNMENT.
39912 Failure to do so could lead to ix86_legitimate_combined_insn
39913 rejecting all changes to such insns. */
39914 unsigned int align = get_pointer_alignment (arg);
39915 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39916 align = GET_MODE_ALIGNMENT (tmode);
39917 if (MEM_ALIGN (target) < align)
39918 set_mem_align (target, align);
39921 target = force_reg (tmode, op);
39929 || !register_operand (target, tmode)
39930 || GET_MODE (target) != tmode)
39931 target = gen_reg_rtx (tmode);
39934 for (i = 0; i < nargs; i++)
39936 machine_mode mode = insn_p->operand[i + 1].mode;
39939 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
39940 op = expand_normal (arg);
39941 match = insn_p->operand[i + 1].predicate (op, mode);
39943 if (last_arg_constant && (i + 1) == nargs)
39947 if (icode == CODE_FOR_lwp_lwpvalsi3
39948 || icode == CODE_FOR_lwp_lwpinssi3
39949 || icode == CODE_FOR_lwp_lwpvaldi3
39950 || icode == CODE_FOR_lwp_lwpinsdi3)
39951 error ("the last argument must be a 32-bit immediate");
39953 error ("the last argument must be an 8-bit immediate");
39961 /* This must be the memory operand. */
39962 op = ix86_zero_extend_to_Pmode (op);
39963 op = gen_rtx_MEM (mode, op);
39964 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
39965 on it. Try to improve it using get_pointer_alignment,
39966 and if the special builtin is one that requires strict
39967 mode alignment, also from it's GET_MODE_ALIGNMENT.
39968 Failure to do so could lead to ix86_legitimate_combined_insn
39969 rejecting all changes to such insns. */
39970 unsigned int align = get_pointer_alignment (arg);
39971 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
39972 align = GET_MODE_ALIGNMENT (mode);
39973 if (MEM_ALIGN (op) < align)
39974 set_mem_align (op, align);
39978 /* This must be register. */
39979 if (VECTOR_MODE_P (mode))
39980 op = safe_vector_operand (op, mode);
39982 op = fixup_modeless_constant (op, mode);
39984 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39985 op = copy_to_mode_reg (mode, op);
39988 op = copy_to_reg (op);
39989 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39995 args[i].mode = mode;
40001 pat = GEN_FCN (icode) (target);
40004 pat = GEN_FCN (icode) (target, args[0].op);
40007 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40010 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
40013 gcc_unreachable ();
40019 return klass == store ? 0 : target;
40022 /* Return the integer constant in ARG. Constrain it to be in the range
40023 of the subparts of VEC_TYPE; issue an error if not. */
40026 get_element_number (tree vec_type, tree arg)
40028 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
40030 if (!tree_fits_uhwi_p (arg)
40031 || (elt = tree_to_uhwi (arg), elt > max))
40033 error ("selector must be an integer constant in the range 0..%wi", max);
40040 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40041 ix86_expand_vector_init. We DO have language-level syntax for this, in
40042 the form of (type){ init-list }. Except that since we can't place emms
40043 instructions from inside the compiler, we can't allow the use of MMX
40044 registers unless the user explicitly asks for it. So we do *not* define
40045 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
40046 we have builtins invoked by mmintrin.h that gives us license to emit
40047 these sorts of instructions. */
40050 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
40052 machine_mode tmode = TYPE_MODE (type);
40053 machine_mode inner_mode = GET_MODE_INNER (tmode);
40054 int i, n_elt = GET_MODE_NUNITS (tmode);
40055 rtvec v = rtvec_alloc (n_elt);
40057 gcc_assert (VECTOR_MODE_P (tmode));
40058 gcc_assert (call_expr_nargs (exp) == n_elt);
40060 for (i = 0; i < n_elt; ++i)
40062 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
40063 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
40066 if (!target || !register_operand (target, tmode))
40067 target = gen_reg_rtx (tmode);
40069 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
40073 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40074 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
40075 had a language-level syntax for referencing vector elements. */
40078 ix86_expand_vec_ext_builtin (tree exp, rtx target)
40080 machine_mode tmode, mode0;
40085 arg0 = CALL_EXPR_ARG (exp, 0);
40086 arg1 = CALL_EXPR_ARG (exp, 1);
40088 op0 = expand_normal (arg0);
40089 elt = get_element_number (TREE_TYPE (arg0), arg1);
40091 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40092 mode0 = TYPE_MODE (TREE_TYPE (arg0));
40093 gcc_assert (VECTOR_MODE_P (mode0));
40095 op0 = force_reg (mode0, op0);
40097 if (optimize || !target || !register_operand (target, tmode))
40098 target = gen_reg_rtx (tmode);
40100 ix86_expand_vector_extract (true, target, op0, elt);
40105 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40106 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
40107 a language-level syntax for referencing vector elements. */
40110 ix86_expand_vec_set_builtin (tree exp)
40112 machine_mode tmode, mode1;
40113 tree arg0, arg1, arg2;
40115 rtx op0, op1, target;
40117 arg0 = CALL_EXPR_ARG (exp, 0);
40118 arg1 = CALL_EXPR_ARG (exp, 1);
40119 arg2 = CALL_EXPR_ARG (exp, 2);
40121 tmode = TYPE_MODE (TREE_TYPE (arg0));
40122 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40123 gcc_assert (VECTOR_MODE_P (tmode));
40125 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
40126 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
40127 elt = get_element_number (TREE_TYPE (arg0), arg2);
40129 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
40130 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
40132 op0 = force_reg (tmode, op0);
40133 op1 = force_reg (mode1, op1);
40135 /* OP0 is the source of these builtin functions and shouldn't be
40136 modified. Create a copy, use it and return it as target. */
40137 target = gen_reg_rtx (tmode);
40138 emit_move_insn (target, op0);
40139 ix86_expand_vector_set (true, target, op1, elt);
40144 /* Emit conditional move of SRC to DST with condition
40147 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
40153 t = ix86_expand_compare (code, op1, op2);
40154 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
40159 rtx_code_label *nomove = gen_label_rtx ();
40160 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
40161 const0_rtx, GET_MODE (op1), 1, nomove);
40162 emit_move_insn (dst, src);
40163 emit_label (nomove);
40167 /* Choose max of DST and SRC and put it to DST. */
40169 ix86_emit_move_max (rtx dst, rtx src)
40171 ix86_emit_cmove (dst, src, LTU, dst, src);
40174 /* Expand an expression EXP that calls a built-in function,
40175 with result going to TARGET if that's convenient
40176 (and in mode MODE if that's convenient).
40177 SUBTARGET may be used as the target for computing one of EXP's operands.
40178 IGNORE is nonzero if the value is to be ignored. */
40181 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
40182 machine_mode mode, int ignore)
40184 const struct builtin_description *d;
40186 enum insn_code icode;
40187 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
40188 tree arg0, arg1, arg2, arg3, arg4;
40189 rtx op0, op1, op2, op3, op4, pat, insn;
40190 machine_mode mode0, mode1, mode2, mode3, mode4;
40191 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
40193 /* For CPU builtins that can be folded, fold first and expand the fold. */
40196 case IX86_BUILTIN_CPU_INIT:
40198 /* Make it call __cpu_indicator_init in libgcc. */
40199 tree call_expr, fndecl, type;
40200 type = build_function_type_list (integer_type_node, NULL_TREE);
40201 fndecl = build_fn_decl ("__cpu_indicator_init", type);
40202 call_expr = build_call_expr (fndecl, 0);
40203 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
40205 case IX86_BUILTIN_CPU_IS:
40206 case IX86_BUILTIN_CPU_SUPPORTS:
40208 tree arg0 = CALL_EXPR_ARG (exp, 0);
40209 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
40210 gcc_assert (fold_expr != NULL_TREE);
40211 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
40215 /* Determine whether the builtin function is available under the current ISA.
40216 Originally the builtin was not created if it wasn't applicable to the
40217 current ISA based on the command line switches. With function specific
40218 options, we need to check in the context of the function making the call
40219 whether it is supported. */
40220 if (ix86_builtins_isa[fcode].isa
40221 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
40223 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
40224 NULL, (enum fpmath_unit) 0, false);
40227 error ("%qE needs unknown isa option", fndecl);
40230 gcc_assert (opts != NULL);
40231 error ("%qE needs isa option %s", fndecl, opts);
40239 case IX86_BUILTIN_BNDMK:
40241 || GET_MODE (target) != BNDmode
40242 || !register_operand (target, BNDmode))
40243 target = gen_reg_rtx (BNDmode);
40245 arg0 = CALL_EXPR_ARG (exp, 0);
40246 arg1 = CALL_EXPR_ARG (exp, 1);
40248 op0 = expand_normal (arg0);
40249 op1 = expand_normal (arg1);
40251 if (!register_operand (op0, Pmode))
40252 op0 = ix86_zero_extend_to_Pmode (op0);
40253 if (!register_operand (op1, Pmode))
40254 op1 = ix86_zero_extend_to_Pmode (op1);
40256 /* Builtin arg1 is size of block but instruction op1 should
40258 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
40259 NULL_RTX, 1, OPTAB_DIRECT);
40261 emit_insn (BNDmode == BND64mode
40262 ? gen_bnd64_mk (target, op0, op1)
40263 : gen_bnd32_mk (target, op0, op1));
40266 case IX86_BUILTIN_BNDSTX:
40267 arg0 = CALL_EXPR_ARG (exp, 0);
40268 arg1 = CALL_EXPR_ARG (exp, 1);
40269 arg2 = CALL_EXPR_ARG (exp, 2);
40271 op0 = expand_normal (arg0);
40272 op1 = expand_normal (arg1);
40273 op2 = expand_normal (arg2);
40275 if (!register_operand (op0, Pmode))
40276 op0 = ix86_zero_extend_to_Pmode (op0);
40277 if (!register_operand (op1, BNDmode))
40278 op1 = copy_to_mode_reg (BNDmode, op1);
40279 if (!register_operand (op2, Pmode))
40280 op2 = ix86_zero_extend_to_Pmode (op2);
40282 emit_insn (BNDmode == BND64mode
40283 ? gen_bnd64_stx (op2, op0, op1)
40284 : gen_bnd32_stx (op2, op0, op1));
40287 case IX86_BUILTIN_BNDLDX:
40289 || GET_MODE (target) != BNDmode
40290 || !register_operand (target, BNDmode))
40291 target = gen_reg_rtx (BNDmode);
40293 arg0 = CALL_EXPR_ARG (exp, 0);
40294 arg1 = CALL_EXPR_ARG (exp, 1);
40296 op0 = expand_normal (arg0);
40297 op1 = expand_normal (arg1);
40299 if (!register_operand (op0, Pmode))
40300 op0 = ix86_zero_extend_to_Pmode (op0);
40301 if (!register_operand (op1, Pmode))
40302 op1 = ix86_zero_extend_to_Pmode (op1);
40304 emit_insn (BNDmode == BND64mode
40305 ? gen_bnd64_ldx (target, op0, op1)
40306 : gen_bnd32_ldx (target, op0, op1));
40309 case IX86_BUILTIN_BNDCL:
40310 arg0 = CALL_EXPR_ARG (exp, 0);
40311 arg1 = CALL_EXPR_ARG (exp, 1);
40313 op0 = expand_normal (arg0);
40314 op1 = expand_normal (arg1);
40316 if (!register_operand (op0, Pmode))
40317 op0 = ix86_zero_extend_to_Pmode (op0);
40318 if (!register_operand (op1, BNDmode))
40319 op1 = copy_to_mode_reg (BNDmode, op1);
40321 emit_insn (BNDmode == BND64mode
40322 ? gen_bnd64_cl (op1, op0)
40323 : gen_bnd32_cl (op1, op0));
40326 case IX86_BUILTIN_BNDCU:
40327 arg0 = CALL_EXPR_ARG (exp, 0);
40328 arg1 = CALL_EXPR_ARG (exp, 1);
40330 op0 = expand_normal (arg0);
40331 op1 = expand_normal (arg1);
40333 if (!register_operand (op0, Pmode))
40334 op0 = ix86_zero_extend_to_Pmode (op0);
40335 if (!register_operand (op1, BNDmode))
40336 op1 = copy_to_mode_reg (BNDmode, op1);
40338 emit_insn (BNDmode == BND64mode
40339 ? gen_bnd64_cu (op1, op0)
40340 : gen_bnd32_cu (op1, op0));
40343 case IX86_BUILTIN_BNDRET:
40344 arg0 = CALL_EXPR_ARG (exp, 0);
40345 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
40346 target = chkp_get_rtl_bounds (arg0);
40348 /* If no bounds were specified for returned value,
40349 then use INIT bounds. It usually happens when
40350 some built-in function is expanded. */
40353 rtx t1 = gen_reg_rtx (Pmode);
40354 rtx t2 = gen_reg_rtx (Pmode);
40355 target = gen_reg_rtx (BNDmode);
40356 emit_move_insn (t1, const0_rtx);
40357 emit_move_insn (t2, constm1_rtx);
40358 emit_insn (BNDmode == BND64mode
40359 ? gen_bnd64_mk (target, t1, t2)
40360 : gen_bnd32_mk (target, t1, t2));
40363 gcc_assert (target && REG_P (target));
40366 case IX86_BUILTIN_BNDNARROW:
40368 rtx m1, m1h1, m1h2, lb, ub, t1;
40370 /* Return value and lb. */
40371 arg0 = CALL_EXPR_ARG (exp, 0);
40373 arg1 = CALL_EXPR_ARG (exp, 1);
40375 arg2 = CALL_EXPR_ARG (exp, 2);
40377 lb = expand_normal (arg0);
40378 op1 = expand_normal (arg1);
40379 op2 = expand_normal (arg2);
40381 /* Size was passed but we need to use (size - 1) as for bndmk. */
40382 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
40383 NULL_RTX, 1, OPTAB_DIRECT);
40385 /* Add LB to size and inverse to get UB. */
40386 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
40387 op2, 1, OPTAB_DIRECT);
40388 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
40390 if (!register_operand (lb, Pmode))
40391 lb = ix86_zero_extend_to_Pmode (lb);
40392 if (!register_operand (ub, Pmode))
40393 ub = ix86_zero_extend_to_Pmode (ub);
40395 /* We need to move bounds to memory before any computations. */
40400 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
40401 emit_move_insn (m1, op1);
40404 /* Generate mem expression to be used for access to LB and UB. */
40405 m1h1 = adjust_address (m1, Pmode, 0);
40406 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
40408 t1 = gen_reg_rtx (Pmode);
40411 emit_move_insn (t1, m1h1);
40412 ix86_emit_move_max (t1, lb);
40413 emit_move_insn (m1h1, t1);
40415 /* Compute UB. UB is stored in 1's complement form. Therefore
40416 we also use max here. */
40417 emit_move_insn (t1, m1h2);
40418 ix86_emit_move_max (t1, ub);
40419 emit_move_insn (m1h2, t1);
40421 op2 = gen_reg_rtx (BNDmode);
40422 emit_move_insn (op2, m1);
40424 return chkp_join_splitted_slot (lb, op2);
40427 case IX86_BUILTIN_BNDINT:
40429 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
40432 || GET_MODE (target) != BNDmode
40433 || !register_operand (target, BNDmode))
40434 target = gen_reg_rtx (BNDmode);
40436 arg0 = CALL_EXPR_ARG (exp, 0);
40437 arg1 = CALL_EXPR_ARG (exp, 1);
40439 op0 = expand_normal (arg0);
40440 op1 = expand_normal (arg1);
40442 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40443 rh1 = adjust_address (res, Pmode, 0);
40444 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40446 /* Put first bounds to temporaries. */
40447 lb1 = gen_reg_rtx (Pmode);
40448 ub1 = gen_reg_rtx (Pmode);
40451 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40452 emit_move_insn (ub1, adjust_address (op0, Pmode,
40453 GET_MODE_SIZE (Pmode)));
40457 emit_move_insn (res, op0);
40458 emit_move_insn (lb1, rh1);
40459 emit_move_insn (ub1, rh2);
40462 /* Put second bounds to temporaries. */
40463 lb2 = gen_reg_rtx (Pmode);
40464 ub2 = gen_reg_rtx (Pmode);
40467 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40468 emit_move_insn (ub2, adjust_address (op1, Pmode,
40469 GET_MODE_SIZE (Pmode)));
40473 emit_move_insn (res, op1);
40474 emit_move_insn (lb2, rh1);
40475 emit_move_insn (ub2, rh2);
40479 ix86_emit_move_max (lb1, lb2);
40480 emit_move_insn (rh1, lb1);
40482 /* Compute UB. UB is stored in 1's complement form. Therefore
40483 we also use max here. */
40484 ix86_emit_move_max (ub1, ub2);
40485 emit_move_insn (rh2, ub1);
40487 emit_move_insn (target, res);
40492 case IX86_BUILTIN_SIZEOF:
40498 || GET_MODE (target) != Pmode
40499 || !register_operand (target, Pmode))
40500 target = gen_reg_rtx (Pmode);
40502 arg0 = CALL_EXPR_ARG (exp, 0);
40503 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40505 name = DECL_ASSEMBLER_NAME (arg0);
40506 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40508 emit_insn (Pmode == SImode
40509 ? gen_move_size_reloc_si (target, symbol)
40510 : gen_move_size_reloc_di (target, symbol));
40515 case IX86_BUILTIN_BNDLOWER:
40520 || GET_MODE (target) != Pmode
40521 || !register_operand (target, Pmode))
40522 target = gen_reg_rtx (Pmode);
40524 arg0 = CALL_EXPR_ARG (exp, 0);
40525 op0 = expand_normal (arg0);
40527 /* We need to move bounds to memory first. */
40532 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40533 emit_move_insn (mem, op0);
40536 /* Generate mem expression to access LB and load it. */
40537 hmem = adjust_address (mem, Pmode, 0);
40538 emit_move_insn (target, hmem);
40543 case IX86_BUILTIN_BNDUPPER:
40545 rtx mem, hmem, res;
40548 || GET_MODE (target) != Pmode
40549 || !register_operand (target, Pmode))
40550 target = gen_reg_rtx (Pmode);
40552 arg0 = CALL_EXPR_ARG (exp, 0);
40553 op0 = expand_normal (arg0);
40555 /* We need to move bounds to memory first. */
40560 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40561 emit_move_insn (mem, op0);
40564 /* Generate mem expression to access UB. */
40565 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40567 /* We need to inverse all bits of UB. */
40568 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40571 emit_move_insn (target, res);
40576 case IX86_BUILTIN_MASKMOVQ:
40577 case IX86_BUILTIN_MASKMOVDQU:
40578 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40579 ? CODE_FOR_mmx_maskmovq
40580 : CODE_FOR_sse2_maskmovdqu);
40581 /* Note the arg order is different from the operand order. */
40582 arg1 = CALL_EXPR_ARG (exp, 0);
40583 arg2 = CALL_EXPR_ARG (exp, 1);
40584 arg0 = CALL_EXPR_ARG (exp, 2);
40585 op0 = expand_normal (arg0);
40586 op1 = expand_normal (arg1);
40587 op2 = expand_normal (arg2);
40588 mode0 = insn_data[icode].operand[0].mode;
40589 mode1 = insn_data[icode].operand[1].mode;
40590 mode2 = insn_data[icode].operand[2].mode;
40592 op0 = ix86_zero_extend_to_Pmode (op0);
40593 op0 = gen_rtx_MEM (mode1, op0);
40595 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40596 op0 = copy_to_mode_reg (mode0, op0);
40597 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40598 op1 = copy_to_mode_reg (mode1, op1);
40599 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40600 op2 = copy_to_mode_reg (mode2, op2);
40601 pat = GEN_FCN (icode) (op0, op1, op2);
40607 case IX86_BUILTIN_LDMXCSR:
40608 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40609 target = assign_386_stack_local (SImode, SLOT_TEMP);
40610 emit_move_insn (target, op0);
40611 emit_insn (gen_sse_ldmxcsr (target));
40614 case IX86_BUILTIN_STMXCSR:
40615 target = assign_386_stack_local (SImode, SLOT_TEMP);
40616 emit_insn (gen_sse_stmxcsr (target));
40617 return copy_to_mode_reg (SImode, target);
40619 case IX86_BUILTIN_CLFLUSH:
40620 arg0 = CALL_EXPR_ARG (exp, 0);
40621 op0 = expand_normal (arg0);
40622 icode = CODE_FOR_sse2_clflush;
40623 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40624 op0 = ix86_zero_extend_to_Pmode (op0);
40626 emit_insn (gen_sse2_clflush (op0));
40629 case IX86_BUILTIN_CLWB:
40630 arg0 = CALL_EXPR_ARG (exp, 0);
40631 op0 = expand_normal (arg0);
40632 icode = CODE_FOR_clwb;
40633 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40634 op0 = ix86_zero_extend_to_Pmode (op0);
40636 emit_insn (gen_clwb (op0));
40639 case IX86_BUILTIN_CLFLUSHOPT:
40640 arg0 = CALL_EXPR_ARG (exp, 0);
40641 op0 = expand_normal (arg0);
40642 icode = CODE_FOR_clflushopt;
40643 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40644 op0 = ix86_zero_extend_to_Pmode (op0);
40646 emit_insn (gen_clflushopt (op0));
40649 case IX86_BUILTIN_MONITOR:
40650 case IX86_BUILTIN_MONITORX:
40651 arg0 = CALL_EXPR_ARG (exp, 0);
40652 arg1 = CALL_EXPR_ARG (exp, 1);
40653 arg2 = CALL_EXPR_ARG (exp, 2);
40654 op0 = expand_normal (arg0);
40655 op1 = expand_normal (arg1);
40656 op2 = expand_normal (arg2);
40658 op0 = ix86_zero_extend_to_Pmode (op0);
40660 op1 = copy_to_mode_reg (SImode, op1);
40662 op2 = copy_to_mode_reg (SImode, op2);
40664 emit_insn (fcode == IX86_BUILTIN_MONITOR
40665 ? ix86_gen_monitor (op0, op1, op2)
40666 : ix86_gen_monitorx (op0, op1, op2));
40669 case IX86_BUILTIN_MWAIT:
40670 arg0 = CALL_EXPR_ARG (exp, 0);
40671 arg1 = CALL_EXPR_ARG (exp, 1);
40672 op0 = expand_normal (arg0);
40673 op1 = expand_normal (arg1);
40675 op0 = copy_to_mode_reg (SImode, op0);
40677 op1 = copy_to_mode_reg (SImode, op1);
40678 emit_insn (gen_sse3_mwait (op0, op1));
40681 case IX86_BUILTIN_MWAITX:
40682 arg0 = CALL_EXPR_ARG (exp, 0);
40683 arg1 = CALL_EXPR_ARG (exp, 1);
40684 arg2 = CALL_EXPR_ARG (exp, 2);
40685 op0 = expand_normal (arg0);
40686 op1 = expand_normal (arg1);
40687 op2 = expand_normal (arg2);
40689 op0 = copy_to_mode_reg (SImode, op0);
40691 op1 = copy_to_mode_reg (SImode, op1);
40693 op2 = copy_to_mode_reg (SImode, op2);
40694 emit_insn (gen_mwaitx (op0, op1, op2));
40697 case IX86_BUILTIN_CLZERO:
40698 arg0 = CALL_EXPR_ARG (exp, 0);
40699 op0 = expand_normal (arg0);
40701 op0 = ix86_zero_extend_to_Pmode (op0);
40702 emit_insn (ix86_gen_clzero (op0));
40705 case IX86_BUILTIN_VEC_INIT_V2SI:
40706 case IX86_BUILTIN_VEC_INIT_V4HI:
40707 case IX86_BUILTIN_VEC_INIT_V8QI:
40708 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40710 case IX86_BUILTIN_VEC_EXT_V2DF:
40711 case IX86_BUILTIN_VEC_EXT_V2DI:
40712 case IX86_BUILTIN_VEC_EXT_V4SF:
40713 case IX86_BUILTIN_VEC_EXT_V4SI:
40714 case IX86_BUILTIN_VEC_EXT_V8HI:
40715 case IX86_BUILTIN_VEC_EXT_V2SI:
40716 case IX86_BUILTIN_VEC_EXT_V4HI:
40717 case IX86_BUILTIN_VEC_EXT_V16QI:
40718 return ix86_expand_vec_ext_builtin (exp, target);
40720 case IX86_BUILTIN_VEC_SET_V2DI:
40721 case IX86_BUILTIN_VEC_SET_V4SF:
40722 case IX86_BUILTIN_VEC_SET_V4SI:
40723 case IX86_BUILTIN_VEC_SET_V8HI:
40724 case IX86_BUILTIN_VEC_SET_V4HI:
40725 case IX86_BUILTIN_VEC_SET_V16QI:
40726 return ix86_expand_vec_set_builtin (exp);
40728 case IX86_BUILTIN_INFQ:
40729 case IX86_BUILTIN_HUGE_VALQ:
40731 REAL_VALUE_TYPE inf;
40735 tmp = const_double_from_real_value (inf, mode);
40737 tmp = validize_mem (force_const_mem (mode, tmp));
40740 target = gen_reg_rtx (mode);
40742 emit_move_insn (target, tmp);
40746 case IX86_BUILTIN_RDPMC:
40747 case IX86_BUILTIN_RDTSC:
40748 case IX86_BUILTIN_RDTSCP:
40750 op0 = gen_reg_rtx (DImode);
40751 op1 = gen_reg_rtx (DImode);
40753 if (fcode == IX86_BUILTIN_RDPMC)
40755 arg0 = CALL_EXPR_ARG (exp, 0);
40756 op2 = expand_normal (arg0);
40757 if (!register_operand (op2, SImode))
40758 op2 = copy_to_mode_reg (SImode, op2);
40760 insn = (TARGET_64BIT
40761 ? gen_rdpmc_rex64 (op0, op1, op2)
40762 : gen_rdpmc (op0, op2));
40765 else if (fcode == IX86_BUILTIN_RDTSC)
40767 insn = (TARGET_64BIT
40768 ? gen_rdtsc_rex64 (op0, op1)
40769 : gen_rdtsc (op0));
40774 op2 = gen_reg_rtx (SImode);
40776 insn = (TARGET_64BIT
40777 ? gen_rdtscp_rex64 (op0, op1, op2)
40778 : gen_rdtscp (op0, op2));
40781 arg0 = CALL_EXPR_ARG (exp, 0);
40782 op4 = expand_normal (arg0);
40783 if (!address_operand (op4, VOIDmode))
40785 op4 = convert_memory_address (Pmode, op4);
40786 op4 = copy_addr_to_reg (op4);
40788 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40793 /* mode is VOIDmode if __builtin_rd* has been called
40795 if (mode == VOIDmode)
40797 target = gen_reg_rtx (mode);
40802 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40803 op1, 1, OPTAB_DIRECT);
40804 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40805 op0, 1, OPTAB_DIRECT);
40808 emit_move_insn (target, op0);
40811 case IX86_BUILTIN_FXSAVE:
40812 case IX86_BUILTIN_FXRSTOR:
40813 case IX86_BUILTIN_FXSAVE64:
40814 case IX86_BUILTIN_FXRSTOR64:
40815 case IX86_BUILTIN_FNSTENV:
40816 case IX86_BUILTIN_FLDENV:
40820 case IX86_BUILTIN_FXSAVE:
40821 icode = CODE_FOR_fxsave;
40823 case IX86_BUILTIN_FXRSTOR:
40824 icode = CODE_FOR_fxrstor;
40826 case IX86_BUILTIN_FXSAVE64:
40827 icode = CODE_FOR_fxsave64;
40829 case IX86_BUILTIN_FXRSTOR64:
40830 icode = CODE_FOR_fxrstor64;
40832 case IX86_BUILTIN_FNSTENV:
40833 icode = CODE_FOR_fnstenv;
40835 case IX86_BUILTIN_FLDENV:
40836 icode = CODE_FOR_fldenv;
40839 gcc_unreachable ();
40842 arg0 = CALL_EXPR_ARG (exp, 0);
40843 op0 = expand_normal (arg0);
40845 if (!address_operand (op0, VOIDmode))
40847 op0 = convert_memory_address (Pmode, op0);
40848 op0 = copy_addr_to_reg (op0);
40850 op0 = gen_rtx_MEM (mode0, op0);
40852 pat = GEN_FCN (icode) (op0);
40857 case IX86_BUILTIN_XSAVE:
40858 case IX86_BUILTIN_XRSTOR:
40859 case IX86_BUILTIN_XSAVE64:
40860 case IX86_BUILTIN_XRSTOR64:
40861 case IX86_BUILTIN_XSAVEOPT:
40862 case IX86_BUILTIN_XSAVEOPT64:
40863 case IX86_BUILTIN_XSAVES:
40864 case IX86_BUILTIN_XRSTORS:
40865 case IX86_BUILTIN_XSAVES64:
40866 case IX86_BUILTIN_XRSTORS64:
40867 case IX86_BUILTIN_XSAVEC:
40868 case IX86_BUILTIN_XSAVEC64:
40869 arg0 = CALL_EXPR_ARG (exp, 0);
40870 arg1 = CALL_EXPR_ARG (exp, 1);
40871 op0 = expand_normal (arg0);
40872 op1 = expand_normal (arg1);
40874 if (!address_operand (op0, VOIDmode))
40876 op0 = convert_memory_address (Pmode, op0);
40877 op0 = copy_addr_to_reg (op0);
40879 op0 = gen_rtx_MEM (BLKmode, op0);
40881 op1 = force_reg (DImode, op1);
40885 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40886 NULL, 1, OPTAB_DIRECT);
40889 case IX86_BUILTIN_XSAVE:
40890 icode = CODE_FOR_xsave_rex64;
40892 case IX86_BUILTIN_XRSTOR:
40893 icode = CODE_FOR_xrstor_rex64;
40895 case IX86_BUILTIN_XSAVE64:
40896 icode = CODE_FOR_xsave64;
40898 case IX86_BUILTIN_XRSTOR64:
40899 icode = CODE_FOR_xrstor64;
40901 case IX86_BUILTIN_XSAVEOPT:
40902 icode = CODE_FOR_xsaveopt_rex64;
40904 case IX86_BUILTIN_XSAVEOPT64:
40905 icode = CODE_FOR_xsaveopt64;
40907 case IX86_BUILTIN_XSAVES:
40908 icode = CODE_FOR_xsaves_rex64;
40910 case IX86_BUILTIN_XRSTORS:
40911 icode = CODE_FOR_xrstors_rex64;
40913 case IX86_BUILTIN_XSAVES64:
40914 icode = CODE_FOR_xsaves64;
40916 case IX86_BUILTIN_XRSTORS64:
40917 icode = CODE_FOR_xrstors64;
40919 case IX86_BUILTIN_XSAVEC:
40920 icode = CODE_FOR_xsavec_rex64;
40922 case IX86_BUILTIN_XSAVEC64:
40923 icode = CODE_FOR_xsavec64;
40926 gcc_unreachable ();
40929 op2 = gen_lowpart (SImode, op2);
40930 op1 = gen_lowpart (SImode, op1);
40931 pat = GEN_FCN (icode) (op0, op1, op2);
40937 case IX86_BUILTIN_XSAVE:
40938 icode = CODE_FOR_xsave;
40940 case IX86_BUILTIN_XRSTOR:
40941 icode = CODE_FOR_xrstor;
40943 case IX86_BUILTIN_XSAVEOPT:
40944 icode = CODE_FOR_xsaveopt;
40946 case IX86_BUILTIN_XSAVES:
40947 icode = CODE_FOR_xsaves;
40949 case IX86_BUILTIN_XRSTORS:
40950 icode = CODE_FOR_xrstors;
40952 case IX86_BUILTIN_XSAVEC:
40953 icode = CODE_FOR_xsavec;
40956 gcc_unreachable ();
40958 pat = GEN_FCN (icode) (op0, op1);
40965 case IX86_BUILTIN_LLWPCB:
40966 arg0 = CALL_EXPR_ARG (exp, 0);
40967 op0 = expand_normal (arg0);
40968 icode = CODE_FOR_lwp_llwpcb;
40969 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40970 op0 = ix86_zero_extend_to_Pmode (op0);
40971 emit_insn (gen_lwp_llwpcb (op0));
40974 case IX86_BUILTIN_SLWPCB:
40975 icode = CODE_FOR_lwp_slwpcb;
40977 || !insn_data[icode].operand[0].predicate (target, Pmode))
40978 target = gen_reg_rtx (Pmode);
40979 emit_insn (gen_lwp_slwpcb (target));
40982 case IX86_BUILTIN_BEXTRI32:
40983 case IX86_BUILTIN_BEXTRI64:
40984 arg0 = CALL_EXPR_ARG (exp, 0);
40985 arg1 = CALL_EXPR_ARG (exp, 1);
40986 op0 = expand_normal (arg0);
40987 op1 = expand_normal (arg1);
40988 icode = (fcode == IX86_BUILTIN_BEXTRI32
40989 ? CODE_FOR_tbm_bextri_si
40990 : CODE_FOR_tbm_bextri_di);
40991 if (!CONST_INT_P (op1))
40993 error ("last argument must be an immediate");
40998 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
40999 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41000 op1 = GEN_INT (length);
41001 op2 = GEN_INT (lsb_index);
41002 pat = GEN_FCN (icode) (target, op0, op1, op2);
41008 case IX86_BUILTIN_RDRAND16_STEP:
41009 icode = CODE_FOR_rdrandhi_1;
41013 case IX86_BUILTIN_RDRAND32_STEP:
41014 icode = CODE_FOR_rdrandsi_1;
41018 case IX86_BUILTIN_RDRAND64_STEP:
41019 icode = CODE_FOR_rdranddi_1;
41023 op0 = gen_reg_rtx (mode0);
41024 emit_insn (GEN_FCN (icode) (op0));
41026 arg0 = CALL_EXPR_ARG (exp, 0);
41027 op1 = expand_normal (arg0);
41028 if (!address_operand (op1, VOIDmode))
41030 op1 = convert_memory_address (Pmode, op1);
41031 op1 = copy_addr_to_reg (op1);
41033 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41035 op1 = gen_reg_rtx (SImode);
41036 emit_move_insn (op1, CONST1_RTX (SImode));
41038 /* Emit SImode conditional move. */
41039 if (mode0 == HImode)
41041 op2 = gen_reg_rtx (SImode);
41042 emit_insn (gen_zero_extendhisi2 (op2, op0));
41044 else if (mode0 == SImode)
41047 op2 = gen_rtx_SUBREG (SImode, op0, 0);
41050 || !register_operand (target, SImode))
41051 target = gen_reg_rtx (SImode);
41053 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
41055 emit_insn (gen_rtx_SET (target,
41056 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
41059 case IX86_BUILTIN_RDSEED16_STEP:
41060 icode = CODE_FOR_rdseedhi_1;
41064 case IX86_BUILTIN_RDSEED32_STEP:
41065 icode = CODE_FOR_rdseedsi_1;
41069 case IX86_BUILTIN_RDSEED64_STEP:
41070 icode = CODE_FOR_rdseeddi_1;
41074 op0 = gen_reg_rtx (mode0);
41075 emit_insn (GEN_FCN (icode) (op0));
41077 arg0 = CALL_EXPR_ARG (exp, 0);
41078 op1 = expand_normal (arg0);
41079 if (!address_operand (op1, VOIDmode))
41081 op1 = convert_memory_address (Pmode, op1);
41082 op1 = copy_addr_to_reg (op1);
41084 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41086 op2 = gen_reg_rtx (QImode);
41088 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
41090 emit_insn (gen_rtx_SET (op2, pat));
41093 || !register_operand (target, SImode))
41094 target = gen_reg_rtx (SImode);
41096 emit_insn (gen_zero_extendqisi2 (target, op2));
41099 case IX86_BUILTIN_SBB32:
41100 icode = CODE_FOR_subborrowsi;
41104 case IX86_BUILTIN_SBB64:
41105 icode = CODE_FOR_subborrowdi;
41109 case IX86_BUILTIN_ADDCARRYX32:
41110 icode = CODE_FOR_addcarrysi;
41114 case IX86_BUILTIN_ADDCARRYX64:
41115 icode = CODE_FOR_addcarrydi;
41119 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
41120 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
41121 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
41122 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
41124 op1 = expand_normal (arg0);
41125 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
41127 op2 = expand_normal (arg1);
41128 if (!register_operand (op2, mode0))
41129 op2 = copy_to_mode_reg (mode0, op2);
41131 op3 = expand_normal (arg2);
41132 if (!register_operand (op3, mode0))
41133 op3 = copy_to_mode_reg (mode0, op3);
41135 op4 = expand_normal (arg3);
41136 if (!address_operand (op4, VOIDmode))
41138 op4 = convert_memory_address (Pmode, op4);
41139 op4 = copy_addr_to_reg (op4);
41142 /* Generate CF from input operand. */
41143 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
41145 /* Generate instruction that consumes CF. */
41146 op0 = gen_reg_rtx (mode0);
41148 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
41149 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
41150 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
41152 /* Return current CF value. */
41154 target = gen_reg_rtx (QImode);
41156 PUT_MODE (pat, QImode);
41157 emit_insn (gen_rtx_SET (target, pat));
41159 /* Store the result. */
41160 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
41164 case IX86_BUILTIN_READ_FLAGS:
41165 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
41168 || target == NULL_RTX
41169 || !nonimmediate_operand (target, word_mode)
41170 || GET_MODE (target) != word_mode)
41171 target = gen_reg_rtx (word_mode);
41173 emit_insn (gen_pop (target));
41176 case IX86_BUILTIN_WRITE_FLAGS:
41178 arg0 = CALL_EXPR_ARG (exp, 0);
41179 op0 = expand_normal (arg0);
41180 if (!general_no_elim_operand (op0, word_mode))
41181 op0 = copy_to_mode_reg (word_mode, op0);
41183 emit_insn (gen_push (op0));
41184 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
41187 case IX86_BUILTIN_KORTESTC16:
41188 icode = CODE_FOR_kortestchi;
41193 case IX86_BUILTIN_KORTESTZ16:
41194 icode = CODE_FOR_kortestzhi;
41199 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
41200 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
41201 op0 = expand_normal (arg0);
41202 op1 = expand_normal (arg1);
41204 op0 = copy_to_reg (op0);
41205 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41206 op1 = copy_to_reg (op1);
41207 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
41209 target = gen_reg_rtx (QImode);
41210 emit_insn (gen_rtx_SET (target, const0_rtx));
41212 /* Emit kortest. */
41213 emit_insn (GEN_FCN (icode) (op0, op1));
41214 /* And use setcc to return result from flags. */
41215 ix86_expand_setcc (target, EQ,
41216 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
41219 case IX86_BUILTIN_GATHERSIV2DF:
41220 icode = CODE_FOR_avx2_gathersiv2df;
41222 case IX86_BUILTIN_GATHERSIV4DF:
41223 icode = CODE_FOR_avx2_gathersiv4df;
41225 case IX86_BUILTIN_GATHERDIV2DF:
41226 icode = CODE_FOR_avx2_gatherdiv2df;
41228 case IX86_BUILTIN_GATHERDIV4DF:
41229 icode = CODE_FOR_avx2_gatherdiv4df;
41231 case IX86_BUILTIN_GATHERSIV4SF:
41232 icode = CODE_FOR_avx2_gathersiv4sf;
41234 case IX86_BUILTIN_GATHERSIV8SF:
41235 icode = CODE_FOR_avx2_gathersiv8sf;
41237 case IX86_BUILTIN_GATHERDIV4SF:
41238 icode = CODE_FOR_avx2_gatherdiv4sf;
41240 case IX86_BUILTIN_GATHERDIV8SF:
41241 icode = CODE_FOR_avx2_gatherdiv8sf;
41243 case IX86_BUILTIN_GATHERSIV2DI:
41244 icode = CODE_FOR_avx2_gathersiv2di;
41246 case IX86_BUILTIN_GATHERSIV4DI:
41247 icode = CODE_FOR_avx2_gathersiv4di;
41249 case IX86_BUILTIN_GATHERDIV2DI:
41250 icode = CODE_FOR_avx2_gatherdiv2di;
41252 case IX86_BUILTIN_GATHERDIV4DI:
41253 icode = CODE_FOR_avx2_gatherdiv4di;
41255 case IX86_BUILTIN_GATHERSIV4SI:
41256 icode = CODE_FOR_avx2_gathersiv4si;
41258 case IX86_BUILTIN_GATHERSIV8SI:
41259 icode = CODE_FOR_avx2_gathersiv8si;
41261 case IX86_BUILTIN_GATHERDIV4SI:
41262 icode = CODE_FOR_avx2_gatherdiv4si;
41264 case IX86_BUILTIN_GATHERDIV8SI:
41265 icode = CODE_FOR_avx2_gatherdiv8si;
41267 case IX86_BUILTIN_GATHERALTSIV4DF:
41268 icode = CODE_FOR_avx2_gathersiv4df;
41270 case IX86_BUILTIN_GATHERALTDIV8SF:
41271 icode = CODE_FOR_avx2_gatherdiv8sf;
41273 case IX86_BUILTIN_GATHERALTSIV4DI:
41274 icode = CODE_FOR_avx2_gathersiv4di;
41276 case IX86_BUILTIN_GATHERALTDIV8SI:
41277 icode = CODE_FOR_avx2_gatherdiv8si;
41279 case IX86_BUILTIN_GATHER3SIV16SF:
41280 icode = CODE_FOR_avx512f_gathersiv16sf;
41282 case IX86_BUILTIN_GATHER3SIV8DF:
41283 icode = CODE_FOR_avx512f_gathersiv8df;
41285 case IX86_BUILTIN_GATHER3DIV16SF:
41286 icode = CODE_FOR_avx512f_gatherdiv16sf;
41288 case IX86_BUILTIN_GATHER3DIV8DF:
41289 icode = CODE_FOR_avx512f_gatherdiv8df;
41291 case IX86_BUILTIN_GATHER3SIV16SI:
41292 icode = CODE_FOR_avx512f_gathersiv16si;
41294 case IX86_BUILTIN_GATHER3SIV8DI:
41295 icode = CODE_FOR_avx512f_gathersiv8di;
41297 case IX86_BUILTIN_GATHER3DIV16SI:
41298 icode = CODE_FOR_avx512f_gatherdiv16si;
41300 case IX86_BUILTIN_GATHER3DIV8DI:
41301 icode = CODE_FOR_avx512f_gatherdiv8di;
41303 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41304 icode = CODE_FOR_avx512f_gathersiv8df;
41306 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41307 icode = CODE_FOR_avx512f_gatherdiv16sf;
41309 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41310 icode = CODE_FOR_avx512f_gathersiv8di;
41312 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41313 icode = CODE_FOR_avx512f_gatherdiv16si;
41315 case IX86_BUILTIN_GATHER3SIV2DF:
41316 icode = CODE_FOR_avx512vl_gathersiv2df;
41318 case IX86_BUILTIN_GATHER3SIV4DF:
41319 icode = CODE_FOR_avx512vl_gathersiv4df;
41321 case IX86_BUILTIN_GATHER3DIV2DF:
41322 icode = CODE_FOR_avx512vl_gatherdiv2df;
41324 case IX86_BUILTIN_GATHER3DIV4DF:
41325 icode = CODE_FOR_avx512vl_gatherdiv4df;
41327 case IX86_BUILTIN_GATHER3SIV4SF:
41328 icode = CODE_FOR_avx512vl_gathersiv4sf;
41330 case IX86_BUILTIN_GATHER3SIV8SF:
41331 icode = CODE_FOR_avx512vl_gathersiv8sf;
41333 case IX86_BUILTIN_GATHER3DIV4SF:
41334 icode = CODE_FOR_avx512vl_gatherdiv4sf;
41336 case IX86_BUILTIN_GATHER3DIV8SF:
41337 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41339 case IX86_BUILTIN_GATHER3SIV2DI:
41340 icode = CODE_FOR_avx512vl_gathersiv2di;
41342 case IX86_BUILTIN_GATHER3SIV4DI:
41343 icode = CODE_FOR_avx512vl_gathersiv4di;
41345 case IX86_BUILTIN_GATHER3DIV2DI:
41346 icode = CODE_FOR_avx512vl_gatherdiv2di;
41348 case IX86_BUILTIN_GATHER3DIV4DI:
41349 icode = CODE_FOR_avx512vl_gatherdiv4di;
41351 case IX86_BUILTIN_GATHER3SIV4SI:
41352 icode = CODE_FOR_avx512vl_gathersiv4si;
41354 case IX86_BUILTIN_GATHER3SIV8SI:
41355 icode = CODE_FOR_avx512vl_gathersiv8si;
41357 case IX86_BUILTIN_GATHER3DIV4SI:
41358 icode = CODE_FOR_avx512vl_gatherdiv4si;
41360 case IX86_BUILTIN_GATHER3DIV8SI:
41361 icode = CODE_FOR_avx512vl_gatherdiv8si;
41363 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41364 icode = CODE_FOR_avx512vl_gathersiv4df;
41366 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41367 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41369 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41370 icode = CODE_FOR_avx512vl_gathersiv4di;
41372 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41373 icode = CODE_FOR_avx512vl_gatherdiv8si;
41375 case IX86_BUILTIN_SCATTERSIV16SF:
41376 icode = CODE_FOR_avx512f_scattersiv16sf;
41378 case IX86_BUILTIN_SCATTERSIV8DF:
41379 icode = CODE_FOR_avx512f_scattersiv8df;
41381 case IX86_BUILTIN_SCATTERDIV16SF:
41382 icode = CODE_FOR_avx512f_scatterdiv16sf;
41384 case IX86_BUILTIN_SCATTERDIV8DF:
41385 icode = CODE_FOR_avx512f_scatterdiv8df;
41387 case IX86_BUILTIN_SCATTERSIV16SI:
41388 icode = CODE_FOR_avx512f_scattersiv16si;
41390 case IX86_BUILTIN_SCATTERSIV8DI:
41391 icode = CODE_FOR_avx512f_scattersiv8di;
41393 case IX86_BUILTIN_SCATTERDIV16SI:
41394 icode = CODE_FOR_avx512f_scatterdiv16si;
41396 case IX86_BUILTIN_SCATTERDIV8DI:
41397 icode = CODE_FOR_avx512f_scatterdiv8di;
41399 case IX86_BUILTIN_SCATTERSIV8SF:
41400 icode = CODE_FOR_avx512vl_scattersiv8sf;
41402 case IX86_BUILTIN_SCATTERSIV4SF:
41403 icode = CODE_FOR_avx512vl_scattersiv4sf;
41405 case IX86_BUILTIN_SCATTERSIV4DF:
41406 icode = CODE_FOR_avx512vl_scattersiv4df;
41408 case IX86_BUILTIN_SCATTERSIV2DF:
41409 icode = CODE_FOR_avx512vl_scattersiv2df;
41411 case IX86_BUILTIN_SCATTERDIV8SF:
41412 icode = CODE_FOR_avx512vl_scatterdiv8sf;
41414 case IX86_BUILTIN_SCATTERDIV4SF:
41415 icode = CODE_FOR_avx512vl_scatterdiv4sf;
41417 case IX86_BUILTIN_SCATTERDIV4DF:
41418 icode = CODE_FOR_avx512vl_scatterdiv4df;
41420 case IX86_BUILTIN_SCATTERDIV2DF:
41421 icode = CODE_FOR_avx512vl_scatterdiv2df;
41423 case IX86_BUILTIN_SCATTERSIV8SI:
41424 icode = CODE_FOR_avx512vl_scattersiv8si;
41426 case IX86_BUILTIN_SCATTERSIV4SI:
41427 icode = CODE_FOR_avx512vl_scattersiv4si;
41429 case IX86_BUILTIN_SCATTERSIV4DI:
41430 icode = CODE_FOR_avx512vl_scattersiv4di;
41432 case IX86_BUILTIN_SCATTERSIV2DI:
41433 icode = CODE_FOR_avx512vl_scattersiv2di;
41435 case IX86_BUILTIN_SCATTERDIV8SI:
41436 icode = CODE_FOR_avx512vl_scatterdiv8si;
41438 case IX86_BUILTIN_SCATTERDIV4SI:
41439 icode = CODE_FOR_avx512vl_scatterdiv4si;
41441 case IX86_BUILTIN_SCATTERDIV4DI:
41442 icode = CODE_FOR_avx512vl_scatterdiv4di;
41444 case IX86_BUILTIN_SCATTERDIV2DI:
41445 icode = CODE_FOR_avx512vl_scatterdiv2di;
41447 case IX86_BUILTIN_GATHERPFDPD:
41448 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41449 goto vec_prefetch_gen;
41450 case IX86_BUILTIN_SCATTERALTSIV8DF:
41451 icode = CODE_FOR_avx512f_scattersiv8df;
41453 case IX86_BUILTIN_SCATTERALTDIV16SF:
41454 icode = CODE_FOR_avx512f_scatterdiv16sf;
41456 case IX86_BUILTIN_SCATTERALTSIV8DI:
41457 icode = CODE_FOR_avx512f_scattersiv8di;
41459 case IX86_BUILTIN_SCATTERALTDIV16SI:
41460 icode = CODE_FOR_avx512f_scatterdiv16si;
41462 case IX86_BUILTIN_GATHERPFDPS:
41463 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41464 goto vec_prefetch_gen;
41465 case IX86_BUILTIN_GATHERPFQPD:
41466 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41467 goto vec_prefetch_gen;
41468 case IX86_BUILTIN_GATHERPFQPS:
41469 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41470 goto vec_prefetch_gen;
41471 case IX86_BUILTIN_SCATTERPFDPD:
41472 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41473 goto vec_prefetch_gen;
41474 case IX86_BUILTIN_SCATTERPFDPS:
41475 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41476 goto vec_prefetch_gen;
41477 case IX86_BUILTIN_SCATTERPFQPD:
41478 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41479 goto vec_prefetch_gen;
41480 case IX86_BUILTIN_SCATTERPFQPS:
41481 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41482 goto vec_prefetch_gen;
41486 rtx (*gen) (rtx, rtx);
41488 arg0 = CALL_EXPR_ARG (exp, 0);
41489 arg1 = CALL_EXPR_ARG (exp, 1);
41490 arg2 = CALL_EXPR_ARG (exp, 2);
41491 arg3 = CALL_EXPR_ARG (exp, 3);
41492 arg4 = CALL_EXPR_ARG (exp, 4);
41493 op0 = expand_normal (arg0);
41494 op1 = expand_normal (arg1);
41495 op2 = expand_normal (arg2);
41496 op3 = expand_normal (arg3);
41497 op4 = expand_normal (arg4);
41498 /* Note the arg order is different from the operand order. */
41499 mode0 = insn_data[icode].operand[1].mode;
41500 mode2 = insn_data[icode].operand[3].mode;
41501 mode3 = insn_data[icode].operand[4].mode;
41502 mode4 = insn_data[icode].operand[5].mode;
41504 if (target == NULL_RTX
41505 || GET_MODE (target) != insn_data[icode].operand[0].mode
41506 || !insn_data[icode].operand[0].predicate (target,
41507 GET_MODE (target)))
41508 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41510 subtarget = target;
41514 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41515 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41516 half = gen_reg_rtx (V8SImode);
41517 if (!nonimmediate_operand (op2, V16SImode))
41518 op2 = copy_to_mode_reg (V16SImode, op2);
41519 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41522 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41523 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41524 case IX86_BUILTIN_GATHERALTSIV4DF:
41525 case IX86_BUILTIN_GATHERALTSIV4DI:
41526 half = gen_reg_rtx (V4SImode);
41527 if (!nonimmediate_operand (op2, V8SImode))
41528 op2 = copy_to_mode_reg (V8SImode, op2);
41529 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41532 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41533 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41534 half = gen_reg_rtx (mode0);
41535 if (mode0 == V8SFmode)
41536 gen = gen_vec_extract_lo_v16sf;
41538 gen = gen_vec_extract_lo_v16si;
41539 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41540 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41541 emit_insn (gen (half, op0));
41543 if (GET_MODE (op3) != VOIDmode)
41545 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41546 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41547 emit_insn (gen (half, op3));
41551 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41552 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41553 case IX86_BUILTIN_GATHERALTDIV8SF:
41554 case IX86_BUILTIN_GATHERALTDIV8SI:
41555 half = gen_reg_rtx (mode0);
41556 if (mode0 == V4SFmode)
41557 gen = gen_vec_extract_lo_v8sf;
41559 gen = gen_vec_extract_lo_v8si;
41560 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41561 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41562 emit_insn (gen (half, op0));
41564 if (GET_MODE (op3) != VOIDmode)
41566 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41567 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41568 emit_insn (gen (half, op3));
41576 /* Force memory operand only with base register here. But we
41577 don't want to do it on memory operand for other builtin
41579 op1 = ix86_zero_extend_to_Pmode (op1);
41581 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41582 op0 = copy_to_mode_reg (mode0, op0);
41583 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41584 op1 = copy_to_mode_reg (Pmode, op1);
41585 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41586 op2 = copy_to_mode_reg (mode2, op2);
41588 op3 = fixup_modeless_constant (op3, mode3);
41590 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41592 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41593 op3 = copy_to_mode_reg (mode3, op3);
41597 op3 = copy_to_reg (op3);
41598 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41600 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41602 error ("the last argument must be scale 1, 2, 4, 8");
41606 /* Optimize. If mask is known to have all high bits set,
41607 replace op0 with pc_rtx to signal that the instruction
41608 overwrites the whole destination and doesn't use its
41609 previous contents. */
41612 if (TREE_CODE (arg3) == INTEGER_CST)
41614 if (integer_all_onesp (arg3))
41617 else if (TREE_CODE (arg3) == VECTOR_CST)
41619 unsigned int negative = 0;
41620 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41622 tree cst = VECTOR_CST_ELT (arg3, i);
41623 if (TREE_CODE (cst) == INTEGER_CST
41624 && tree_int_cst_sign_bit (cst))
41626 else if (TREE_CODE (cst) == REAL_CST
41627 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41630 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41633 else if (TREE_CODE (arg3) == SSA_NAME
41634 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41636 /* Recognize also when mask is like:
41637 __v2df src = _mm_setzero_pd ();
41638 __v2df mask = _mm_cmpeq_pd (src, src);
41640 __v8sf src = _mm256_setzero_ps ();
41641 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41642 as that is a cheaper way to load all ones into
41643 a register than having to load a constant from
41645 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41646 if (is_gimple_call (def_stmt))
41648 tree fndecl = gimple_call_fndecl (def_stmt);
41650 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41651 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41653 case IX86_BUILTIN_CMPPD:
41654 case IX86_BUILTIN_CMPPS:
41655 case IX86_BUILTIN_CMPPD256:
41656 case IX86_BUILTIN_CMPPS256:
41657 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41660 case IX86_BUILTIN_CMPEQPD:
41661 case IX86_BUILTIN_CMPEQPS:
41662 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41663 && initializer_zerop (gimple_call_arg (def_stmt,
41674 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41681 case IX86_BUILTIN_GATHER3DIV16SF:
41682 if (target == NULL_RTX)
41683 target = gen_reg_rtx (V8SFmode);
41684 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41686 case IX86_BUILTIN_GATHER3DIV16SI:
41687 if (target == NULL_RTX)
41688 target = gen_reg_rtx (V8SImode);
41689 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41691 case IX86_BUILTIN_GATHER3DIV8SF:
41692 case IX86_BUILTIN_GATHERDIV8SF:
41693 if (target == NULL_RTX)
41694 target = gen_reg_rtx (V4SFmode);
41695 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41697 case IX86_BUILTIN_GATHER3DIV8SI:
41698 case IX86_BUILTIN_GATHERDIV8SI:
41699 if (target == NULL_RTX)
41700 target = gen_reg_rtx (V4SImode);
41701 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41704 target = subtarget;
41710 arg0 = CALL_EXPR_ARG (exp, 0);
41711 arg1 = CALL_EXPR_ARG (exp, 1);
41712 arg2 = CALL_EXPR_ARG (exp, 2);
41713 arg3 = CALL_EXPR_ARG (exp, 3);
41714 arg4 = CALL_EXPR_ARG (exp, 4);
41715 op0 = expand_normal (arg0);
41716 op1 = expand_normal (arg1);
41717 op2 = expand_normal (arg2);
41718 op3 = expand_normal (arg3);
41719 op4 = expand_normal (arg4);
41720 mode1 = insn_data[icode].operand[1].mode;
41721 mode2 = insn_data[icode].operand[2].mode;
41722 mode3 = insn_data[icode].operand[3].mode;
41723 mode4 = insn_data[icode].operand[4].mode;
41725 /* Scatter instruction stores operand op3 to memory with
41726 indices from op2 and scale from op4 under writemask op1.
41727 If index operand op2 has more elements then source operand
41728 op3 one need to use only its low half. And vice versa. */
41731 case IX86_BUILTIN_SCATTERALTSIV8DF:
41732 case IX86_BUILTIN_SCATTERALTSIV8DI:
41733 half = gen_reg_rtx (V8SImode);
41734 if (!nonimmediate_operand (op2, V16SImode))
41735 op2 = copy_to_mode_reg (V16SImode, op2);
41736 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41739 case IX86_BUILTIN_SCATTERALTDIV16SF:
41740 case IX86_BUILTIN_SCATTERALTDIV16SI:
41741 half = gen_reg_rtx (mode3);
41742 if (mode3 == V8SFmode)
41743 gen = gen_vec_extract_lo_v16sf;
41745 gen = gen_vec_extract_lo_v16si;
41746 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41747 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41748 emit_insn (gen (half, op3));
41755 /* Force memory operand only with base register here. But we
41756 don't want to do it on memory operand for other builtin
41758 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41760 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41761 op0 = copy_to_mode_reg (Pmode, op0);
41763 op1 = fixup_modeless_constant (op1, mode1);
41765 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41767 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41768 op1 = copy_to_mode_reg (mode1, op1);
41772 op1 = copy_to_reg (op1);
41773 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41776 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41777 op2 = copy_to_mode_reg (mode2, op2);
41779 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41780 op3 = copy_to_mode_reg (mode3, op3);
41782 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41784 error ("the last argument must be scale 1, 2, 4, 8");
41788 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41796 arg0 = CALL_EXPR_ARG (exp, 0);
41797 arg1 = CALL_EXPR_ARG (exp, 1);
41798 arg2 = CALL_EXPR_ARG (exp, 2);
41799 arg3 = CALL_EXPR_ARG (exp, 3);
41800 arg4 = CALL_EXPR_ARG (exp, 4);
41801 op0 = expand_normal (arg0);
41802 op1 = expand_normal (arg1);
41803 op2 = expand_normal (arg2);
41804 op3 = expand_normal (arg3);
41805 op4 = expand_normal (arg4);
41806 mode0 = insn_data[icode].operand[0].mode;
41807 mode1 = insn_data[icode].operand[1].mode;
41808 mode3 = insn_data[icode].operand[3].mode;
41809 mode4 = insn_data[icode].operand[4].mode;
41811 op0 = fixup_modeless_constant (op0, mode0);
41813 if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
41815 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41816 op0 = copy_to_mode_reg (mode0, op0);
41820 op0 = copy_to_reg (op0);
41821 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41824 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41825 op1 = copy_to_mode_reg (mode1, op1);
41827 /* Force memory operand only with base register here. But we
41828 don't want to do it on memory operand for other builtin
41830 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41832 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41833 op2 = copy_to_mode_reg (Pmode, op2);
41835 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41837 error ("the forth argument must be scale 1, 2, 4, 8");
41841 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41843 error ("incorrect hint operand");
41847 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41855 case IX86_BUILTIN_XABORT:
41856 icode = CODE_FOR_xabort;
41857 arg0 = CALL_EXPR_ARG (exp, 0);
41858 op0 = expand_normal (arg0);
41859 mode0 = insn_data[icode].operand[0].mode;
41860 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41862 error ("the xabort's argument must be an 8-bit immediate");
41865 emit_insn (gen_xabort (op0));
41872 for (i = 0, d = bdesc_special_args;
41873 i < ARRAY_SIZE (bdesc_special_args);
41875 if (d->code == fcode)
41876 return ix86_expand_special_args_builtin (d, exp, target);
41878 for (i = 0, d = bdesc_args;
41879 i < ARRAY_SIZE (bdesc_args);
41881 if (d->code == fcode)
41884 case IX86_BUILTIN_FABSQ:
41885 case IX86_BUILTIN_COPYSIGNQ:
41887 /* Emit a normal call if SSE isn't available. */
41888 return expand_call (exp, target, ignore);
41890 return ix86_expand_args_builtin (d, exp, target);
41893 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41894 if (d->code == fcode)
41895 return ix86_expand_sse_comi (d, exp, target);
41897 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41898 if (d->code == fcode)
41899 return ix86_expand_round_builtin (d, exp, target);
41901 for (i = 0, d = bdesc_pcmpestr;
41902 i < ARRAY_SIZE (bdesc_pcmpestr);
41904 if (d->code == fcode)
41905 return ix86_expand_sse_pcmpestr (d, exp, target);
41907 for (i = 0, d = bdesc_pcmpistr;
41908 i < ARRAY_SIZE (bdesc_pcmpistr);
41910 if (d->code == fcode)
41911 return ix86_expand_sse_pcmpistr (d, exp, target);
41913 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41914 if (d->code == fcode)
41915 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41916 (enum ix86_builtin_func_type)
41917 d->flag, d->comparison);
41919 gcc_unreachable ();
41922 /* This returns the target-specific builtin with code CODE if
41923 current_function_decl has visibility on this builtin, which is checked
41924 using isa flags. Returns NULL_TREE otherwise. */
41926 static tree ix86_get_builtin (enum ix86_builtins code)
41928 struct cl_target_option *opts;
41929 tree target_tree = NULL_TREE;
41931 /* Determine the isa flags of current_function_decl. */
41933 if (current_function_decl)
41934 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
41936 if (target_tree == NULL)
41937 target_tree = target_option_default_node;
41939 opts = TREE_TARGET_OPTION (target_tree);
41941 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
41942 return ix86_builtin_decl (code, true);
41947 /* Return function decl for target specific builtin
41948 for given MPX builtin passed i FCODE. */
41950 ix86_builtin_mpx_function (unsigned fcode)
41954 case BUILT_IN_CHKP_BNDMK:
41955 return ix86_builtins[IX86_BUILTIN_BNDMK];
41957 case BUILT_IN_CHKP_BNDSTX:
41958 return ix86_builtins[IX86_BUILTIN_BNDSTX];
41960 case BUILT_IN_CHKP_BNDLDX:
41961 return ix86_builtins[IX86_BUILTIN_BNDLDX];
41963 case BUILT_IN_CHKP_BNDCL:
41964 return ix86_builtins[IX86_BUILTIN_BNDCL];
41966 case BUILT_IN_CHKP_BNDCU:
41967 return ix86_builtins[IX86_BUILTIN_BNDCU];
41969 case BUILT_IN_CHKP_BNDRET:
41970 return ix86_builtins[IX86_BUILTIN_BNDRET];
41972 case BUILT_IN_CHKP_INTERSECT:
41973 return ix86_builtins[IX86_BUILTIN_BNDINT];
41975 case BUILT_IN_CHKP_NARROW:
41976 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
41978 case BUILT_IN_CHKP_SIZEOF:
41979 return ix86_builtins[IX86_BUILTIN_SIZEOF];
41981 case BUILT_IN_CHKP_EXTRACT_LOWER:
41982 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
41984 case BUILT_IN_CHKP_EXTRACT_UPPER:
41985 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
41991 gcc_unreachable ();
41994 /* Helper function for ix86_load_bounds and ix86_store_bounds.
41996 Return an address to be used to load/store bounds for pointer
41999 SLOT_NO is an integer constant holding number of a target
42000 dependent special slot to be used in case SLOT is not a memory.
42002 SPECIAL_BASE is a pointer to be used as a base of fake address
42003 to access special slots in Bounds Table. SPECIAL_BASE[-1],
42004 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
42007 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
42011 /* NULL slot means we pass bounds for pointer not passed to the
42012 function at all. Register slot means we pass pointer in a
42013 register. In both these cases bounds are passed via Bounds
42014 Table. Since we do not have actual pointer stored in memory,
42015 we have to use fake addresses to access Bounds Table. We
42016 start with (special_base - sizeof (void*)) and decrease this
42017 address by pointer size to get addresses for other slots. */
42018 if (!slot || REG_P (slot))
42020 gcc_assert (CONST_INT_P (slot_no));
42021 addr = plus_constant (Pmode, special_base,
42022 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
42024 /* If pointer is passed in a memory then its address is used to
42025 access Bounds Table. */
42026 else if (MEM_P (slot))
42028 addr = XEXP (slot, 0);
42029 if (!register_operand (addr, Pmode))
42030 addr = copy_addr_to_reg (addr);
42033 gcc_unreachable ();
42038 /* Expand pass uses this hook to load bounds for function parameter
42039 PTR passed in SLOT in case its bounds are not passed in a register.
42041 If SLOT is a memory, then bounds are loaded as for regular pointer
42042 loaded from memory. PTR may be NULL in case SLOT is a memory.
42043 In such case value of PTR (if required) may be loaded from SLOT.
42045 If SLOT is NULL or a register then SLOT_NO is an integer constant
42046 holding number of the target dependent special slot which should be
42047 used to obtain bounds.
42049 Return loaded bounds. */
42052 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
42054 rtx reg = gen_reg_rtx (BNDmode);
42057 /* Get address to be used to access Bounds Table. Special slots start
42058 at the location of return address of the current function. */
42059 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
42061 /* Load pointer value from a memory if we don't have it. */
42064 gcc_assert (MEM_P (slot));
42065 ptr = copy_addr_to_reg (slot);
42068 if (!register_operand (ptr, Pmode))
42069 ptr = ix86_zero_extend_to_Pmode (ptr);
42071 emit_insn (BNDmode == BND64mode
42072 ? gen_bnd64_ldx (reg, addr, ptr)
42073 : gen_bnd32_ldx (reg, addr, ptr));
42078 /* Expand pass uses this hook to store BOUNDS for call argument PTR
42079 passed in SLOT in case BOUNDS are not passed in a register.
42081 If SLOT is a memory, then BOUNDS are stored as for regular pointer
42082 stored in memory. PTR may be NULL in case SLOT is a memory.
42083 In such case value of PTR (if required) may be loaded from SLOT.
42085 If SLOT is NULL or a register then SLOT_NO is an integer constant
42086 holding number of the target dependent special slot which should be
42087 used to store BOUNDS. */
42090 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
42094 /* Get address to be used to access Bounds Table. Special slots start
42095 at the location of return address of a called function. */
42096 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
42098 /* Load pointer value from a memory if we don't have it. */
42101 gcc_assert (MEM_P (slot));
42102 ptr = copy_addr_to_reg (slot);
42105 if (!register_operand (ptr, Pmode))
42106 ptr = ix86_zero_extend_to_Pmode (ptr);
42108 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
42109 if (!register_operand (bounds, BNDmode))
42110 bounds = copy_to_mode_reg (BNDmode, bounds);
42112 emit_insn (BNDmode == BND64mode
42113 ? gen_bnd64_stx (addr, ptr, bounds)
42114 : gen_bnd32_stx (addr, ptr, bounds));
42117 /* Load and return bounds returned by function in SLOT. */
42120 ix86_load_returned_bounds (rtx slot)
42124 gcc_assert (REG_P (slot));
42125 res = gen_reg_rtx (BNDmode);
42126 emit_move_insn (res, slot);
42131 /* Store BOUNDS returned by function into SLOT. */
42134 ix86_store_returned_bounds (rtx slot, rtx bounds)
42136 gcc_assert (REG_P (slot));
42137 emit_move_insn (slot, bounds);
42140 /* Returns a function decl for a vectorized version of the combined function
42141 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
42142 if it is not available. */
42145 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
42148 machine_mode in_mode, out_mode;
42151 if (TREE_CODE (type_out) != VECTOR_TYPE
42152 || TREE_CODE (type_in) != VECTOR_TYPE)
42155 out_mode = TYPE_MODE (TREE_TYPE (type_out));
42156 out_n = TYPE_VECTOR_SUBPARTS (type_out);
42157 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42158 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42163 if (out_mode == SFmode && in_mode == SFmode)
42165 if (out_n == 16 && in_n == 16)
42166 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
42173 /* The round insn does not trap on denormals. */
42174 if (flag_trapping_math || !TARGET_ROUND)
42177 if (out_mode == SImode && in_mode == DFmode)
42179 if (out_n == 4 && in_n == 2)
42180 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
42181 else if (out_n == 8 && in_n == 4)
42182 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
42183 else if (out_n == 16 && in_n == 8)
42184 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
42186 if (out_mode == SImode && in_mode == SFmode)
42188 if (out_n == 4 && in_n == 4)
42189 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
42190 else if (out_n == 8 && in_n == 8)
42191 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
42198 /* The round insn does not trap on denormals. */
42199 if (flag_trapping_math || !TARGET_ROUND)
42202 if (out_mode == SImode && in_mode == DFmode)
42204 if (out_n == 4 && in_n == 2)
42205 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
42206 else if (out_n == 8 && in_n == 4)
42207 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
42208 else if (out_n == 16 && in_n == 8)
42209 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
42211 if (out_mode == SImode && in_mode == SFmode)
42213 if (out_n == 4 && in_n == 4)
42214 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
42215 else if (out_n == 8 && in_n == 8)
42216 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
42223 if (out_mode == SImode && in_mode == DFmode)
42225 if (out_n == 4 && in_n == 2)
42226 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
42227 else if (out_n == 8 && in_n == 4)
42228 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
42230 if (out_mode == SImode && in_mode == SFmode)
42232 if (out_n == 4 && in_n == 4)
42233 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
42234 else if (out_n == 8 && in_n == 8)
42235 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
42242 /* The round insn does not trap on denormals. */
42243 if (flag_trapping_math || !TARGET_ROUND)
42246 if (out_mode == SImode && in_mode == DFmode)
42248 if (out_n == 4 && in_n == 2)
42249 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
42250 else if (out_n == 8 && in_n == 4)
42251 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
42252 else if (out_n == 16 && in_n == 8)
42253 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
42255 if (out_mode == SImode && in_mode == SFmode)
42257 if (out_n == 4 && in_n == 4)
42258 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
42259 else if (out_n == 8 && in_n == 8)
42260 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
42265 /* The round insn does not trap on denormals. */
42266 if (flag_trapping_math || !TARGET_ROUND)
42269 if (out_mode == DFmode && in_mode == DFmode)
42271 if (out_n == 2 && in_n == 2)
42272 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
42273 else if (out_n == 4 && in_n == 4)
42274 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
42276 if (out_mode == SFmode && in_mode == SFmode)
42278 if (out_n == 4 && in_n == 4)
42279 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
42280 else if (out_n == 8 && in_n == 8)
42281 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
42286 /* The round insn does not trap on denormals. */
42287 if (flag_trapping_math || !TARGET_ROUND)
42290 if (out_mode == DFmode && in_mode == DFmode)
42292 if (out_n == 2 && in_n == 2)
42293 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
42294 else if (out_n == 4 && in_n == 4)
42295 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
42297 if (out_mode == SFmode && in_mode == SFmode)
42299 if (out_n == 4 && in_n == 4)
42300 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
42301 else if (out_n == 8 && in_n == 8)
42302 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
42307 /* The round insn does not trap on denormals. */
42308 if (flag_trapping_math || !TARGET_ROUND)
42311 if (out_mode == DFmode && in_mode == DFmode)
42313 if (out_n == 2 && in_n == 2)
42314 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
42315 else if (out_n == 4 && in_n == 4)
42316 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
42318 if (out_mode == SFmode && in_mode == SFmode)
42320 if (out_n == 4 && in_n == 4)
42321 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
42322 else if (out_n == 8 && in_n == 8)
42323 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
42328 /* The round insn does not trap on denormals. */
42329 if (flag_trapping_math || !TARGET_ROUND)
42332 if (out_mode == DFmode && in_mode == DFmode)
42334 if (out_n == 2 && in_n == 2)
42335 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
42336 else if (out_n == 4 && in_n == 4)
42337 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
42339 if (out_mode == SFmode && in_mode == SFmode)
42341 if (out_n == 4 && in_n == 4)
42342 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42343 else if (out_n == 8 && in_n == 8)
42344 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42349 if (out_mode == DFmode && in_mode == DFmode)
42351 if (out_n == 2 && in_n == 2)
42352 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42353 if (out_n == 4 && in_n == 4)
42354 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42356 if (out_mode == SFmode && in_mode == SFmode)
42358 if (out_n == 4 && in_n == 4)
42359 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42360 if (out_n == 8 && in_n == 8)
42361 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42369 /* Dispatch to a handler for a vectorization library. */
42370 if (ix86_veclib_handler)
42371 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
42376 /* Handler for an SVML-style interface to
42377 a library with vectorized intrinsics. */
42380 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
42383 tree fntype, new_fndecl, args;
42386 machine_mode el_mode, in_mode;
42389 /* The SVML is suitable for unsafe math only. */
42390 if (!flag_unsafe_math_optimizations)
42393 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42394 n = TYPE_VECTOR_SUBPARTS (type_out);
42395 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42396 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42397 if (el_mode != in_mode
42421 if ((el_mode != DFmode || n != 2)
42422 && (el_mode != SFmode || n != 4))
42430 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42431 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42433 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
42434 strcpy (name, "vmlsLn4");
42435 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
42436 strcpy (name, "vmldLn2");
42439 sprintf (name, "vmls%s", bname+10);
42440 name[strlen (name)-1] = '4';
42443 sprintf (name, "vmld%s2", bname+10);
42445 /* Convert to uppercase. */
42449 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42453 fntype = build_function_type_list (type_out, type_in, NULL);
42455 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42457 /* Build a function declaration for the vectorized function. */
42458 new_fndecl = build_decl (BUILTINS_LOCATION,
42459 FUNCTION_DECL, get_identifier (name), fntype);
42460 TREE_PUBLIC (new_fndecl) = 1;
42461 DECL_EXTERNAL (new_fndecl) = 1;
42462 DECL_IS_NOVOPS (new_fndecl) = 1;
42463 TREE_READONLY (new_fndecl) = 1;
42468 /* Handler for an ACML-style interface to
42469 a library with vectorized intrinsics. */
42472 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
42474 char name[20] = "__vr.._";
42475 tree fntype, new_fndecl, args;
42478 machine_mode el_mode, in_mode;
42481 /* The ACML is 64bits only and suitable for unsafe math only as
42482 it does not correctly support parts of IEEE with the required
42483 precision such as denormals. */
42485 || !flag_unsafe_math_optimizations)
42488 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42489 n = TYPE_VECTOR_SUBPARTS (type_out);
42490 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42491 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42492 if (el_mode != in_mode
42504 if (el_mode == DFmode && n == 2)
42509 else if (el_mode == SFmode && n == 4)
42522 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42523 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42524 sprintf (name + 7, "%s", bname+10);
42527 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42531 fntype = build_function_type_list (type_out, type_in, NULL);
42533 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42535 /* Build a function declaration for the vectorized function. */
42536 new_fndecl = build_decl (BUILTINS_LOCATION,
42537 FUNCTION_DECL, get_identifier (name), fntype);
42538 TREE_PUBLIC (new_fndecl) = 1;
42539 DECL_EXTERNAL (new_fndecl) = 1;
42540 DECL_IS_NOVOPS (new_fndecl) = 1;
42541 TREE_READONLY (new_fndecl) = 1;
42546 /* Returns a decl of a function that implements gather load with
42547 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42548 Return NULL_TREE if it is not available. */
42551 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42552 const_tree index_type, int scale)
42555 enum ix86_builtins code;
42560 if ((TREE_CODE (index_type) != INTEGER_TYPE
42561 && !POINTER_TYPE_P (index_type))
42562 || (TYPE_MODE (index_type) != SImode
42563 && TYPE_MODE (index_type) != DImode))
42566 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42569 /* v*gather* insn sign extends index to pointer mode. */
42570 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42571 && TYPE_UNSIGNED (index_type))
42576 || (scale & (scale - 1)) != 0)
42579 si = TYPE_MODE (index_type) == SImode;
42580 switch (TYPE_MODE (mem_vectype))
42583 if (TARGET_AVX512VL)
42584 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42586 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42589 if (TARGET_AVX512VL)
42590 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42592 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42595 if (TARGET_AVX512VL)
42596 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42598 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42601 if (TARGET_AVX512VL)
42602 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42604 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42607 if (TARGET_AVX512VL)
42608 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42610 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42613 if (TARGET_AVX512VL)
42614 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42616 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42619 if (TARGET_AVX512VL)
42620 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42622 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42625 if (TARGET_AVX512VL)
42626 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42628 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42631 if (TARGET_AVX512F)
42632 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42637 if (TARGET_AVX512F)
42638 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42643 if (TARGET_AVX512F)
42644 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42649 if (TARGET_AVX512F)
42650 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42658 return ix86_get_builtin (code);
42661 /* Returns a decl of a function that implements scatter store with
42662 register type VECTYPE and index type INDEX_TYPE and SCALE.
42663 Return NULL_TREE if it is not available. */
42666 ix86_vectorize_builtin_scatter (const_tree vectype,
42667 const_tree index_type, int scale)
42670 enum ix86_builtins code;
42672 if (!TARGET_AVX512F)
42675 if ((TREE_CODE (index_type) != INTEGER_TYPE
42676 && !POINTER_TYPE_P (index_type))
42677 || (TYPE_MODE (index_type) != SImode
42678 && TYPE_MODE (index_type) != DImode))
42681 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42684 /* v*scatter* insn sign extends index to pointer mode. */
42685 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42686 && TYPE_UNSIGNED (index_type))
42689 /* Scale can be 1, 2, 4 or 8. */
42692 || (scale & (scale - 1)) != 0)
42695 si = TYPE_MODE (index_type) == SImode;
42696 switch (TYPE_MODE (vectype))
42699 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42702 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42705 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42708 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42714 return ix86_builtins[code];
42717 /* Return true if it is safe to use the rsqrt optabs to optimize
42723 return (TARGET_SSE_MATH
42724 && flag_finite_math_only
42725 && !flag_trapping_math
42726 && flag_unsafe_math_optimizations);
42729 /* Returns a code for a target-specific builtin that implements
42730 reciprocal of the function, or NULL_TREE if not available. */
42733 ix86_builtin_reciprocal (tree fndecl)
42735 switch (DECL_FUNCTION_CODE (fndecl))
42737 /* Vectorized version of sqrt to rsqrt conversion. */
42738 case IX86_BUILTIN_SQRTPS_NR:
42739 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42741 case IX86_BUILTIN_SQRTPS_NR256:
42742 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42749 /* Helper for avx_vpermilps256_operand et al. This is also used by
42750 the expansion functions to turn the parallel back into a mask.
42751 The return value is 0 for no match and the imm8+1 for a match. */
42754 avx_vpermilp_parallel (rtx par, machine_mode mode)
42756 unsigned i, nelt = GET_MODE_NUNITS (mode);
42758 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42760 if (XVECLEN (par, 0) != (int) nelt)
42763 /* Validate that all of the elements are constants, and not totally
42764 out of range. Copy the data into an integral array to make the
42765 subsequent checks easier. */
42766 for (i = 0; i < nelt; ++i)
42768 rtx er = XVECEXP (par, 0, i);
42769 unsigned HOST_WIDE_INT ei;
42771 if (!CONST_INT_P (er))
42782 /* In the 512-bit DFmode case, we can only move elements within
42783 a 128-bit lane. First fill the second part of the mask,
42785 for (i = 4; i < 6; ++i)
42787 if (ipar[i] < 4 || ipar[i] >= 6)
42789 mask |= (ipar[i] - 4) << i;
42791 for (i = 6; i < 8; ++i)
42795 mask |= (ipar[i] - 6) << i;
42800 /* In the 256-bit DFmode case, we can only move elements within
42802 for (i = 0; i < 2; ++i)
42806 mask |= ipar[i] << i;
42808 for (i = 2; i < 4; ++i)
42812 mask |= (ipar[i] - 2) << i;
42817 /* In 512 bit SFmode case, permutation in the upper 256 bits
42818 must mirror the permutation in the lower 256-bits. */
42819 for (i = 0; i < 8; ++i)
42820 if (ipar[i] + 8 != ipar[i + 8])
42825 /* In 256 bit SFmode case, we have full freedom of
42826 movement within the low 128-bit lane, but the high 128-bit
42827 lane must mirror the exact same pattern. */
42828 for (i = 0; i < 4; ++i)
42829 if (ipar[i] + 4 != ipar[i + 4])
42836 /* In the 128-bit case, we've full freedom in the placement of
42837 the elements from the source operand. */
42838 for (i = 0; i < nelt; ++i)
42839 mask |= ipar[i] << (i * (nelt / 2));
42843 gcc_unreachable ();
42846 /* Make sure success has a non-zero value by adding one. */
42850 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42851 the expansion functions to turn the parallel back into a mask.
42852 The return value is 0 for no match and the imm8+1 for a match. */
42855 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42857 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42859 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42861 if (XVECLEN (par, 0) != (int) nelt)
42864 /* Validate that all of the elements are constants, and not totally
42865 out of range. Copy the data into an integral array to make the
42866 subsequent checks easier. */
42867 for (i = 0; i < nelt; ++i)
42869 rtx er = XVECEXP (par, 0, i);
42870 unsigned HOST_WIDE_INT ei;
42872 if (!CONST_INT_P (er))
42875 if (ei >= 2 * nelt)
42880 /* Validate that the halves of the permute are halves. */
42881 for (i = 0; i < nelt2 - 1; ++i)
42882 if (ipar[i] + 1 != ipar[i + 1])
42884 for (i = nelt2; i < nelt - 1; ++i)
42885 if (ipar[i] + 1 != ipar[i + 1])
42888 /* Reconstruct the mask. */
42889 for (i = 0; i < 2; ++i)
42891 unsigned e = ipar[i * nelt2];
42895 mask |= e << (i * 4);
42898 /* Make sure success has a non-zero value by adding one. */
42902 /* Return a register priority for hard reg REGNO. */
42904 ix86_register_priority (int hard_regno)
42906 /* ebp and r13 as the base always wants a displacement, r12 as the
42907 base always wants an index. So discourage their usage in an
42909 if (hard_regno == R12_REG || hard_regno == R13_REG)
42911 if (hard_regno == BP_REG)
42913 /* New x86-64 int registers result in bigger code size. Discourage
42915 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42917 /* New x86-64 SSE registers result in bigger code size. Discourage
42919 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42921 /* Usage of AX register results in smaller code. Prefer it. */
42922 if (hard_regno == AX_REG)
42927 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
42929 Put float CONST_DOUBLE in the constant pool instead of fp regs.
42930 QImode must go into class Q_REGS.
42931 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
42932 movdf to do mem-to-mem moves through integer regs. */
42935 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
42937 machine_mode mode = GET_MODE (x);
42939 /* We're only allowed to return a subclass of CLASS. Many of the
42940 following checks fail for NO_REGS, so eliminate that early. */
42941 if (regclass == NO_REGS)
42944 /* All classes can load zeros. */
42945 if (x == CONST0_RTX (mode))
42948 /* Force constants into memory if we are loading a (nonzero) constant into
42949 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
42950 instructions to load from a constant. */
42952 && (MAYBE_MMX_CLASS_P (regclass)
42953 || MAYBE_SSE_CLASS_P (regclass)
42954 || MAYBE_MASK_CLASS_P (regclass)))
42957 /* Prefer SSE regs only, if we can use them for math. */
42958 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
42959 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
42961 /* Floating-point constants need more complex checks. */
42962 if (CONST_DOUBLE_P (x))
42964 /* General regs can load everything. */
42965 if (reg_class_subset_p (regclass, GENERAL_REGS))
42968 /* Floats can load 0 and 1 plus some others. Note that we eliminated
42969 zero above. We only want to wind up preferring 80387 registers if
42970 we plan on doing computation with them. */
42972 && standard_80387_constant_p (x) > 0)
42974 /* Limit class to non-sse. */
42975 if (regclass == FLOAT_SSE_REGS)
42977 if (regclass == FP_TOP_SSE_REGS)
42979 if (regclass == FP_SECOND_SSE_REGS)
42980 return FP_SECOND_REG;
42981 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
42988 /* Generally when we see PLUS here, it's the function invariant
42989 (plus soft-fp const_int). Which can only be computed into general
42991 if (GET_CODE (x) == PLUS)
42992 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
42994 /* QImode constants are easy to load, but non-constant QImode data
42995 must go into Q_REGS. */
42996 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
42998 if (reg_class_subset_p (regclass, Q_REGS))
43000 if (reg_class_subset_p (Q_REGS, regclass))
43008 /* Discourage putting floating-point values in SSE registers unless
43009 SSE math is being used, and likewise for the 387 registers. */
43011 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
43013 machine_mode mode = GET_MODE (x);
43015 /* Restrict the output reload class to the register bank that we are doing
43016 math on. If we would like not to return a subset of CLASS, reject this
43017 alternative: if reload cannot do this, it will still use its choice. */
43018 mode = GET_MODE (x);
43019 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
43020 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
43022 if (X87_FLOAT_MODE_P (mode))
43024 if (regclass == FP_TOP_SSE_REGS)
43026 else if (regclass == FP_SECOND_SSE_REGS)
43027 return FP_SECOND_REG;
43029 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
43036 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
43037 machine_mode mode, secondary_reload_info *sri)
43039 /* Double-word spills from general registers to non-offsettable memory
43040 references (zero-extended addresses) require special handling. */
43043 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
43044 && INTEGER_CLASS_P (rclass)
43045 && !offsettable_memref_p (x))
43048 ? CODE_FOR_reload_noff_load
43049 : CODE_FOR_reload_noff_store);
43050 /* Add the cost of moving address to a temporary. */
43051 sri->extra_cost = 1;
43056 /* QImode spills from non-QI registers require
43057 intermediate register on 32bit targets. */
43059 && (MAYBE_MASK_CLASS_P (rclass)
43060 || (!TARGET_64BIT && !in_p
43061 && INTEGER_CLASS_P (rclass)
43062 && MAYBE_NON_Q_CLASS_P (rclass))))
43071 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
43072 regno = true_regnum (x);
43074 /* Return Q_REGS if the operand is in memory. */
43079 /* This condition handles corner case where an expression involving
43080 pointers gets vectorized. We're trying to use the address of a
43081 stack slot as a vector initializer.
43083 (set (reg:V2DI 74 [ vect_cst_.2 ])
43084 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
43086 Eventually frame gets turned into sp+offset like this:
43088 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43089 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43090 (const_int 392 [0x188]))))
43092 That later gets turned into:
43094 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43095 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43096 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
43098 We'll have the following reload recorded:
43100 Reload 0: reload_in (DI) =
43101 (plus:DI (reg/f:DI 7 sp)
43102 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
43103 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43104 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
43105 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
43106 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43107 reload_reg_rtx: (reg:V2DI 22 xmm1)
43109 Which isn't going to work since SSE instructions can't handle scalar
43110 additions. Returning GENERAL_REGS forces the addition into integer
43111 register and reload can handle subsequent reloads without problems. */
43113 if (in_p && GET_CODE (x) == PLUS
43114 && SSE_CLASS_P (rclass)
43115 && SCALAR_INT_MODE_P (mode))
43116 return GENERAL_REGS;
43121 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
43124 ix86_class_likely_spilled_p (reg_class_t rclass)
43135 case SSE_FIRST_REG:
43137 case FP_SECOND_REG:
43148 /* If we are copying between general and FP registers, we need a memory
43149 location. The same is true for SSE and MMX registers.
43151 To optimize register_move_cost performance, allow inline variant.
43153 The macro can't work reliably when one of the CLASSES is class containing
43154 registers from multiple units (SSE, MMX, integer). We avoid this by never
43155 combining those units in single alternative in the machine description.
43156 Ensure that this constraint holds to avoid unexpected surprises.
43158 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
43159 enforce these sanity checks. */
43162 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43163 machine_mode mode, int strict)
43165 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
43167 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
43168 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
43169 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
43170 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
43171 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
43172 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
43174 gcc_assert (!strict || lra_in_progress);
43178 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
43181 /* Between mask and general, we have moves no larger than word size. */
43182 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
43183 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
43186 /* ??? This is a lie. We do have moves between mmx/general, and for
43187 mmx/sse2. But by saying we need secondary memory we discourage the
43188 register allocator from using the mmx registers unless needed. */
43189 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
43192 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43194 /* SSE1 doesn't have any direct moves from other classes. */
43198 /* If the target says that inter-unit moves are more expensive
43199 than moving through memory, then don't generate them. */
43200 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
43201 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
43204 /* Between SSE and general, we have moves no larger than word size. */
43205 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43213 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43214 machine_mode mode, int strict)
43216 return inline_secondary_memory_needed (class1, class2, mode, strict);
43219 /* Implement the TARGET_CLASS_MAX_NREGS hook.
43221 On the 80386, this is the size of MODE in words,
43222 except in the FP regs, where a single reg is always enough. */
43224 static unsigned char
43225 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
43227 if (MAYBE_INTEGER_CLASS_P (rclass))
43229 if (mode == XFmode)
43230 return (TARGET_64BIT ? 2 : 3);
43231 else if (mode == XCmode)
43232 return (TARGET_64BIT ? 4 : 6);
43234 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
43238 if (COMPLEX_MODE_P (mode))
43245 /* Return true if the registers in CLASS cannot represent the change from
43246 modes FROM to TO. */
43249 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
43250 enum reg_class regclass)
43255 /* x87 registers can't do subreg at all, as all values are reformatted
43256 to extended precision. */
43257 if (MAYBE_FLOAT_CLASS_P (regclass))
43260 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
43262 int from_size = GET_MODE_SIZE (from);
43263 int to_size = GET_MODE_SIZE (to);
43265 /* Vector registers do not support QI or HImode loads. If we don't
43266 disallow a change to these modes, reload will assume it's ok to
43267 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
43268 the vec_dupv4hi pattern. */
43272 /* Further, we cannot allow word_mode subregs of full vector modes.
43273 Otherwise the middle-end will assume it's ok to store to
43274 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
43275 of the 128-bit register. However, after reload the subreg will
43276 be dropped leaving a plain DImode store. This is indistinguishable
43277 from a "normal" DImode move, and so we're justified to use movsd,
43278 which modifies the entire 128-bit register. */
43279 if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
43286 /* Return the cost of moving data of mode M between a
43287 register and memory. A value of 2 is the default; this cost is
43288 relative to those in `REGISTER_MOVE_COST'.
43290 This function is used extensively by register_move_cost that is used to
43291 build tables at startup. Make it inline in this case.
43292 When IN is 2, return maximum of in and out move cost.
43294 If moving between registers and memory is more expensive than
43295 between two registers, you should define this macro to express the
43298 Model also increased moving costs of QImode registers in non
43302 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43306 if (FLOAT_CLASS_P (regclass))
43324 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43325 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43327 if (SSE_CLASS_P (regclass))
43330 switch (GET_MODE_SIZE (mode))
43345 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43346 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43348 if (MMX_CLASS_P (regclass))
43351 switch (GET_MODE_SIZE (mode))
43363 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43364 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43366 switch (GET_MODE_SIZE (mode))
43369 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43372 return ix86_cost->int_store[0];
43373 if (TARGET_PARTIAL_REG_DEPENDENCY
43374 && optimize_function_for_speed_p (cfun))
43375 cost = ix86_cost->movzbl_load;
43377 cost = ix86_cost->int_load[0];
43379 return MAX (cost, ix86_cost->int_store[0]);
43385 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43387 return ix86_cost->movzbl_load;
43389 return ix86_cost->int_store[0] + 4;
43394 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43395 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43397 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43398 if (mode == TFmode)
43401 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43403 cost = ix86_cost->int_load[2];
43405 cost = ix86_cost->int_store[2];
43406 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43411 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43414 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43418 /* Return the cost of moving data from a register in class CLASS1 to
43419 one in class CLASS2.
43421 It is not required that the cost always equal 2 when FROM is the same as TO;
43422 on some machines it is expensive to move between registers if they are not
43423 general registers. */
43426 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43427 reg_class_t class2_i)
43429 enum reg_class class1 = (enum reg_class) class1_i;
43430 enum reg_class class2 = (enum reg_class) class2_i;
43432 /* In case we require secondary memory, compute cost of the store followed
43433 by load. In order to avoid bad register allocation choices, we need
43434 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43436 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43440 cost += inline_memory_move_cost (mode, class1, 2);
43441 cost += inline_memory_move_cost (mode, class2, 2);
43443 /* In case of copying from general_purpose_register we may emit multiple
43444 stores followed by single load causing memory size mismatch stall.
43445 Count this as arbitrarily high cost of 20. */
43446 if (targetm.class_max_nregs (class1, mode)
43447 > targetm.class_max_nregs (class2, mode))
43450 /* In the case of FP/MMX moves, the registers actually overlap, and we
43451 have to switch modes in order to treat them differently. */
43452 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43453 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43459 /* Moves between SSE/MMX and integer unit are expensive. */
43460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43461 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43463 /* ??? By keeping returned value relatively high, we limit the number
43464 of moves between integer and MMX/SSE registers for all targets.
43465 Additionally, high value prevents problem with x86_modes_tieable_p(),
43466 where integer modes in MMX/SSE registers are not tieable
43467 because of missing QImode and HImode moves to, from or between
43468 MMX/SSE registers. */
43469 return MAX (8, ix86_cost->mmxsse_to_integer);
43471 if (MAYBE_FLOAT_CLASS_P (class1))
43472 return ix86_cost->fp_move;
43473 if (MAYBE_SSE_CLASS_P (class1))
43474 return ix86_cost->sse_move;
43475 if (MAYBE_MMX_CLASS_P (class1))
43476 return ix86_cost->mmx_move;
43480 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43484 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43486 /* Flags and only flags can only hold CCmode values. */
43487 if (CC_REGNO_P (regno))
43488 return GET_MODE_CLASS (mode) == MODE_CC;
43489 if (GET_MODE_CLASS (mode) == MODE_CC
43490 || GET_MODE_CLASS (mode) == MODE_RANDOM
43491 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43493 if (STACK_REGNO_P (regno))
43494 return VALID_FP_MODE_P (mode);
43495 if (MASK_REGNO_P (regno))
43496 return (VALID_MASK_REG_MODE (mode)
43497 || (TARGET_AVX512BW
43498 && VALID_MASK_AVX512BW_MODE (mode)));
43499 if (BND_REGNO_P (regno))
43500 return VALID_BND_REG_MODE (mode);
43501 if (SSE_REGNO_P (regno))
43503 /* We implement the move patterns for all vector modes into and
43504 out of SSE registers, even when no operation instructions
43507 /* For AVX-512 we allow, regardless of regno:
43509 - any of 512-bit wide vector mode
43510 - any scalar mode. */
43513 || VALID_AVX512F_REG_MODE (mode)
43514 || VALID_AVX512F_SCALAR_MODE (mode)))
43517 /* TODO check for QI/HI scalars. */
43518 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43519 if (TARGET_AVX512VL
43522 || VALID_AVX256_REG_MODE (mode)
43523 || VALID_AVX512VL_128_REG_MODE (mode)))
43526 /* xmm16-xmm31 are only available for AVX-512. */
43527 if (EXT_REX_SSE_REGNO_P (regno))
43530 /* OImode and AVX modes are available only when AVX is enabled. */
43531 return ((TARGET_AVX
43532 && VALID_AVX256_REG_OR_OI_MODE (mode))
43533 || VALID_SSE_REG_MODE (mode)
43534 || VALID_SSE2_REG_MODE (mode)
43535 || VALID_MMX_REG_MODE (mode)
43536 || VALID_MMX_REG_MODE_3DNOW (mode));
43538 if (MMX_REGNO_P (regno))
43540 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43541 so if the register is available at all, then we can move data of
43542 the given mode into or out of it. */
43543 return (VALID_MMX_REG_MODE (mode)
43544 || VALID_MMX_REG_MODE_3DNOW (mode));
43547 if (mode == QImode)
43549 /* Take care for QImode values - they can be in non-QI regs,
43550 but then they do cause partial register stalls. */
43551 if (ANY_QI_REGNO_P (regno))
43553 if (!TARGET_PARTIAL_REG_STALL)
43555 /* LRA checks if the hard register is OK for the given mode.
43556 QImode values can live in non-QI regs, so we allow all
43558 if (lra_in_progress)
43560 return !can_create_pseudo_p ();
43562 /* We handle both integer and floats in the general purpose registers. */
43563 else if (VALID_INT_MODE_P (mode))
43565 else if (VALID_FP_MODE_P (mode))
43567 else if (VALID_DFP_MODE_P (mode))
43569 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43570 on to use that value in smaller contexts, this can easily force a
43571 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43572 supporting DImode, allow it. */
43573 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43579 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43580 tieable integer mode. */
43583 ix86_tieable_integer_mode_p (machine_mode mode)
43592 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43595 return TARGET_64BIT;
43602 /* Return true if MODE1 is accessible in a register that can hold MODE2
43603 without copying. That is, all register classes that can hold MODE2
43604 can also hold MODE1. */
43607 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43609 if (mode1 == mode2)
43612 if (ix86_tieable_integer_mode_p (mode1)
43613 && ix86_tieable_integer_mode_p (mode2))
43616 /* MODE2 being XFmode implies fp stack or general regs, which means we
43617 can tie any smaller floating point modes to it. Note that we do not
43618 tie this with TFmode. */
43619 if (mode2 == XFmode)
43620 return mode1 == SFmode || mode1 == DFmode;
43622 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43623 that we can tie it with SFmode. */
43624 if (mode2 == DFmode)
43625 return mode1 == SFmode;
43627 /* If MODE2 is only appropriate for an SSE register, then tie with
43628 any other mode acceptable to SSE registers. */
43629 if (GET_MODE_SIZE (mode2) == 32
43630 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43631 return (GET_MODE_SIZE (mode1) == 32
43632 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43633 if (GET_MODE_SIZE (mode2) == 16
43634 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43635 return (GET_MODE_SIZE (mode1) == 16
43636 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43638 /* If MODE2 is appropriate for an MMX register, then tie
43639 with any other mode acceptable to MMX registers. */
43640 if (GET_MODE_SIZE (mode2) == 8
43641 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43642 return (GET_MODE_SIZE (mode1) == 8
43643 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43648 /* Return the cost of moving between two registers of mode MODE. */
43651 ix86_set_reg_reg_cost (machine_mode mode)
43653 unsigned int units = UNITS_PER_WORD;
43655 switch (GET_MODE_CLASS (mode))
43661 units = GET_MODE_SIZE (CCmode);
43665 if ((TARGET_SSE && mode == TFmode)
43666 || (TARGET_80387 && mode == XFmode)
43667 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43668 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43669 units = GET_MODE_SIZE (mode);
43672 case MODE_COMPLEX_FLOAT:
43673 if ((TARGET_SSE && mode == TCmode)
43674 || (TARGET_80387 && mode == XCmode)
43675 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43676 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43677 units = GET_MODE_SIZE (mode);
43680 case MODE_VECTOR_INT:
43681 case MODE_VECTOR_FLOAT:
43682 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43683 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43684 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43685 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43686 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43687 units = GET_MODE_SIZE (mode);
43690 /* Return the cost of moving between two registers of mode MODE,
43691 assuming that the move will be in pieces of at most UNITS bytes. */
43692 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43695 /* Compute a (partial) cost for rtx X. Return true if the complete
43696 cost has been computed, and false if subexpressions should be
43697 scanned. In either case, *TOTAL contains the cost result. */
43700 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43701 int *total, bool speed)
43704 enum rtx_code code = GET_CODE (x);
43705 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43706 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43711 if (register_operand (SET_DEST (x), VOIDmode)
43712 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43714 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43723 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43725 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43727 else if (flag_pic && SYMBOLIC_CONST (x)
43729 && (GET_CODE (x) == LABEL_REF
43730 || (GET_CODE (x) == SYMBOL_REF
43731 && SYMBOL_REF_LOCAL_P (x))))
43732 /* Use 0 cost for CONST to improve its propagation. */
43733 && (TARGET_64BIT || GET_CODE (x) != CONST))
43739 case CONST_WIDE_INT:
43744 switch (standard_80387_constant_p (x))
43749 default: /* Other constants */
43756 if (SSE_FLOAT_MODE_P (mode))
43759 switch (standard_sse_constant_p (x))
43763 case 1: /* 0: xor eliminates false dependency */
43766 default: /* -1: cmp contains false dependency */
43771 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43772 it'll probably end up. Add a penalty for size. */
43773 *total = (COSTS_N_INSNS (1)
43774 + (flag_pic != 0 && !TARGET_64BIT)
43775 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43779 /* The zero extensions is often completely free on x86_64, so make
43780 it as cheap as possible. */
43781 if (TARGET_64BIT && mode == DImode
43782 && GET_MODE (XEXP (x, 0)) == SImode)
43784 else if (TARGET_ZERO_EXTEND_WITH_AND)
43785 *total = cost->add;
43787 *total = cost->movzx;
43791 *total = cost->movsx;
43795 if (SCALAR_INT_MODE_P (mode)
43796 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43797 && CONST_INT_P (XEXP (x, 1)))
43799 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43802 *total = cost->add;
43805 if ((value == 2 || value == 3)
43806 && cost->lea <= cost->shift_const)
43808 *total = cost->lea;
43818 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43820 /* ??? Should be SSE vector operation cost. */
43821 /* At least for published AMD latencies, this really is the same
43822 as the latency for a simple fpu operation like fabs. */
43823 /* V*QImode is emulated with 1-11 insns. */
43824 if (mode == V16QImode || mode == V32QImode)
43827 if (TARGET_XOP && mode == V16QImode)
43829 /* For XOP we use vpshab, which requires a broadcast of the
43830 value to the variable shift insn. For constants this
43831 means a V16Q const in mem; even when we can perform the
43832 shift with one insn set the cost to prefer paddb. */
43833 if (CONSTANT_P (XEXP (x, 1)))
43835 *total = (cost->fabs
43836 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43837 + (speed ? 2 : COSTS_N_BYTES (16)));
43842 else if (TARGET_SSSE3)
43844 *total = cost->fabs * count;
43847 *total = cost->fabs;
43849 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43851 if (CONST_INT_P (XEXP (x, 1)))
43853 if (INTVAL (XEXP (x, 1)) > 32)
43854 *total = cost->shift_const + COSTS_N_INSNS (2);
43856 *total = cost->shift_const * 2;
43860 if (GET_CODE (XEXP (x, 1)) == AND)
43861 *total = cost->shift_var * 2;
43863 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43868 if (CONST_INT_P (XEXP (x, 1)))
43869 *total = cost->shift_const;
43870 else if (SUBREG_P (XEXP (x, 1))
43871 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43873 /* Return the cost after shift-and truncation. */
43874 *total = cost->shift_var;
43878 *total = cost->shift_var;
43886 gcc_assert (FLOAT_MODE_P (mode));
43887 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43889 /* ??? SSE scalar/vector cost should be used here. */
43890 /* ??? Bald assumption that fma has the same cost as fmul. */
43891 *total = cost->fmul;
43892 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43894 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43896 if (GET_CODE (sub) == NEG)
43897 sub = XEXP (sub, 0);
43898 *total += rtx_cost (sub, mode, FMA, 0, speed);
43901 if (GET_CODE (sub) == NEG)
43902 sub = XEXP (sub, 0);
43903 *total += rtx_cost (sub, mode, FMA, 2, speed);
43908 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43910 /* ??? SSE scalar cost should be used here. */
43911 *total = cost->fmul;
43914 else if (X87_FLOAT_MODE_P (mode))
43916 *total = cost->fmul;
43919 else if (FLOAT_MODE_P (mode))
43921 /* ??? SSE vector cost should be used here. */
43922 *total = cost->fmul;
43925 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43927 /* V*QImode is emulated with 7-13 insns. */
43928 if (mode == V16QImode || mode == V32QImode)
43931 if (TARGET_XOP && mode == V16QImode)
43933 else if (TARGET_SSSE3)
43935 *total = cost->fmul * 2 + cost->fabs * extra;
43937 /* V*DImode is emulated with 5-8 insns. */
43938 else if (mode == V2DImode || mode == V4DImode)
43940 if (TARGET_XOP && mode == V2DImode)
43941 *total = cost->fmul * 2 + cost->fabs * 3;
43943 *total = cost->fmul * 3 + cost->fabs * 5;
43945 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
43946 insns, including two PMULUDQ. */
43947 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
43948 *total = cost->fmul * 2 + cost->fabs * 5;
43950 *total = cost->fmul;
43955 rtx op0 = XEXP (x, 0);
43956 rtx op1 = XEXP (x, 1);
43958 if (CONST_INT_P (XEXP (x, 1)))
43960 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43961 for (nbits = 0; value != 0; value &= value - 1)
43965 /* This is arbitrary. */
43968 /* Compute costs correctly for widening multiplication. */
43969 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
43970 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
43971 == GET_MODE_SIZE (mode))
43973 int is_mulwiden = 0;
43974 machine_mode inner_mode = GET_MODE (op0);
43976 if (GET_CODE (op0) == GET_CODE (op1))
43977 is_mulwiden = 1, op1 = XEXP (op1, 0);
43978 else if (CONST_INT_P (op1))
43980 if (GET_CODE (op0) == SIGN_EXTEND)
43981 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
43984 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
43988 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
43991 *total = (cost->mult_init[MODE_INDEX (mode)]
43992 + nbits * cost->mult_bit
43993 + rtx_cost (op0, mode, outer_code, opno, speed)
43994 + rtx_cost (op1, mode, outer_code, opno, speed));
44003 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44004 /* ??? SSE cost should be used here. */
44005 *total = cost->fdiv;
44006 else if (X87_FLOAT_MODE_P (mode))
44007 *total = cost->fdiv;
44008 else if (FLOAT_MODE_P (mode))
44009 /* ??? SSE vector cost should be used here. */
44010 *total = cost->fdiv;
44012 *total = cost->divide[MODE_INDEX (mode)];
44016 if (GET_MODE_CLASS (mode) == MODE_INT
44017 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
44019 if (GET_CODE (XEXP (x, 0)) == PLUS
44020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
44021 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
44022 && CONSTANT_P (XEXP (x, 1)))
44024 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
44025 if (val == 2 || val == 4 || val == 8)
44027 *total = cost->lea;
44028 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44029 outer_code, opno, speed);
44030 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
44031 outer_code, opno, speed);
44032 *total += rtx_cost (XEXP (x, 1), mode,
44033 outer_code, opno, speed);
44037 else if (GET_CODE (XEXP (x, 0)) == MULT
44038 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
44040 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
44041 if (val == 2 || val == 4 || val == 8)
44043 *total = cost->lea;
44044 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44045 outer_code, opno, speed);
44046 *total += rtx_cost (XEXP (x, 1), mode,
44047 outer_code, opno, speed);
44051 else if (GET_CODE (XEXP (x, 0)) == PLUS)
44053 *total = cost->lea;
44054 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44055 outer_code, opno, speed);
44056 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44057 outer_code, opno, speed);
44058 *total += rtx_cost (XEXP (x, 1), mode,
44059 outer_code, opno, speed);
44066 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44068 /* ??? SSE cost should be used here. */
44069 *total = cost->fadd;
44072 else if (X87_FLOAT_MODE_P (mode))
44074 *total = cost->fadd;
44077 else if (FLOAT_MODE_P (mode))
44079 /* ??? SSE vector cost should be used here. */
44080 *total = cost->fadd;
44088 if (GET_MODE_CLASS (mode) == MODE_INT
44089 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44091 *total = (cost->add * 2
44092 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
44093 << (GET_MODE (XEXP (x, 0)) != DImode))
44094 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
44095 << (GET_MODE (XEXP (x, 1)) != DImode)));
44101 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44103 /* ??? SSE cost should be used here. */
44104 *total = cost->fchs;
44107 else if (X87_FLOAT_MODE_P (mode))
44109 *total = cost->fchs;
44112 else if (FLOAT_MODE_P (mode))
44114 /* ??? SSE vector cost should be used here. */
44115 *total = cost->fchs;
44121 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44123 /* ??? Should be SSE vector operation cost. */
44124 /* At least for published AMD latencies, this really is the same
44125 as the latency for a simple fpu operation like fabs. */
44126 *total = cost->fabs;
44128 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44129 *total = cost->add * 2;
44131 *total = cost->add;
44135 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
44136 && XEXP (XEXP (x, 0), 1) == const1_rtx
44137 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
44138 && XEXP (x, 1) == const0_rtx)
44140 /* This kind of construct is implemented using test[bwl].
44141 Treat it as if we had an AND. */
44142 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
44143 *total = (cost->add
44144 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
44146 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
44150 /* The embedded comparison operand is completely free. */
44151 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
44152 && XEXP (x, 1) == const0_rtx)
44158 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
44163 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44164 /* ??? SSE cost should be used here. */
44165 *total = cost->fabs;
44166 else if (X87_FLOAT_MODE_P (mode))
44167 *total = cost->fabs;
44168 else if (FLOAT_MODE_P (mode))
44169 /* ??? SSE vector cost should be used here. */
44170 *total = cost->fabs;
44174 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44175 /* ??? SSE cost should be used here. */
44176 *total = cost->fsqrt;
44177 else if (X87_FLOAT_MODE_P (mode))
44178 *total = cost->fsqrt;
44179 else if (FLOAT_MODE_P (mode))
44180 /* ??? SSE vector cost should be used here. */
44181 *total = cost->fsqrt;
44185 if (XINT (x, 1) == UNSPEC_TP)
44191 case VEC_DUPLICATE:
44192 /* ??? Assume all of these vector manipulation patterns are
44193 recognizable. In which case they all pretty much have the
44195 *total = cost->fabs;
44198 mask = XEXP (x, 2);
44199 /* This is masked instruction, assume the same cost,
44200 as nonmasked variant. */
44201 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
44202 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
44204 *total = cost->fabs;
44214 static int current_machopic_label_num;
44216 /* Given a symbol name and its associated stub, write out the
44217 definition of the stub. */
44220 machopic_output_stub (FILE *file, const char *symb, const char *stub)
44222 unsigned int length;
44223 char *binder_name, *symbol_name, lazy_ptr_name[32];
44224 int label = ++current_machopic_label_num;
44226 /* For 64-bit we shouldn't get here. */
44227 gcc_assert (!TARGET_64BIT);
44229 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
44230 symb = targetm.strip_name_encoding (symb);
44232 length = strlen (stub);
44233 binder_name = XALLOCAVEC (char, length + 32);
44234 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
44236 length = strlen (symb);
44237 symbol_name = XALLOCAVEC (char, length + 32);
44238 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
44240 sprintf (lazy_ptr_name, "L%d$lz", label);
44242 if (MACHOPIC_ATT_STUB)
44243 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
44244 else if (MACHOPIC_PURE)
44245 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
44247 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
44249 fprintf (file, "%s:\n", stub);
44250 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44252 if (MACHOPIC_ATT_STUB)
44254 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
44256 else if (MACHOPIC_PURE)
44259 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44260 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
44261 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
44262 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
44263 label, lazy_ptr_name, label);
44264 fprintf (file, "\tjmp\t*%%ecx\n");
44267 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
44269 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
44270 it needs no stub-binding-helper. */
44271 if (MACHOPIC_ATT_STUB)
44274 fprintf (file, "%s:\n", binder_name);
44278 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
44279 fprintf (file, "\tpushl\t%%ecx\n");
44282 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44284 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44286 /* N.B. Keep the correspondence of these
44287 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44288 old-pic/new-pic/non-pic stubs; altering this will break
44289 compatibility with existing dylibs. */
44292 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44293 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44296 /* 16-byte -mdynamic-no-pic stub. */
44297 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44299 fprintf (file, "%s:\n", lazy_ptr_name);
44300 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44301 fprintf (file, ASM_LONG "%s\n", binder_name);
44303 #endif /* TARGET_MACHO */
44305 /* Order the registers for register allocator. */
44308 x86_order_regs_for_local_alloc (void)
44313 /* First allocate the local general purpose registers. */
44314 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44315 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44316 reg_alloc_order [pos++] = i;
44318 /* Global general purpose registers. */
44319 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44320 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44321 reg_alloc_order [pos++] = i;
44323 /* x87 registers come first in case we are doing FP math
44325 if (!TARGET_SSE_MATH)
44326 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44327 reg_alloc_order [pos++] = i;
44329 /* SSE registers. */
44330 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44331 reg_alloc_order [pos++] = i;
44332 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44333 reg_alloc_order [pos++] = i;
44335 /* Extended REX SSE registers. */
44336 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44337 reg_alloc_order [pos++] = i;
44339 /* Mask register. */
44340 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44341 reg_alloc_order [pos++] = i;
44343 /* MPX bound registers. */
44344 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44345 reg_alloc_order [pos++] = i;
44347 /* x87 registers. */
44348 if (TARGET_SSE_MATH)
44349 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44350 reg_alloc_order [pos++] = i;
44352 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44353 reg_alloc_order [pos++] = i;
44355 /* Initialize the rest of array as we do not allocate some registers
44357 while (pos < FIRST_PSEUDO_REGISTER)
44358 reg_alloc_order [pos++] = 0;
44361 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44362 in struct attribute_spec handler. */
44364 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44367 bool *no_add_attrs)
44369 if (TREE_CODE (*node) != FUNCTION_TYPE
44370 && TREE_CODE (*node) != METHOD_TYPE
44371 && TREE_CODE (*node) != FIELD_DECL
44372 && TREE_CODE (*node) != TYPE_DECL)
44374 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44376 *no_add_attrs = true;
44381 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44383 *no_add_attrs = true;
44386 if (is_attribute_p ("callee_pop_aggregate_return", name))
44390 cst = TREE_VALUE (args);
44391 if (TREE_CODE (cst) != INTEGER_CST)
44393 warning (OPT_Wattributes,
44394 "%qE attribute requires an integer constant argument",
44396 *no_add_attrs = true;
44398 else if (compare_tree_int (cst, 0) != 0
44399 && compare_tree_int (cst, 1) != 0)
44401 warning (OPT_Wattributes,
44402 "argument to %qE attribute is neither zero, nor one",
44404 *no_add_attrs = true;
44413 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44414 struct attribute_spec.handler. */
44416 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44417 bool *no_add_attrs)
44419 if (TREE_CODE (*node) != FUNCTION_TYPE
44420 && TREE_CODE (*node) != METHOD_TYPE
44421 && TREE_CODE (*node) != FIELD_DECL
44422 && TREE_CODE (*node) != TYPE_DECL)
44424 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44426 *no_add_attrs = true;
44430 /* Can combine regparm with all attributes but fastcall. */
44431 if (is_attribute_p ("ms_abi", name))
44433 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44435 error ("ms_abi and sysv_abi attributes are not compatible");
44440 else if (is_attribute_p ("sysv_abi", name))
44442 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44444 error ("ms_abi and sysv_abi attributes are not compatible");
44453 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44454 struct attribute_spec.handler. */
44456 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44457 bool *no_add_attrs)
44460 if (DECL_P (*node))
44462 if (TREE_CODE (*node) == TYPE_DECL)
44463 type = &TREE_TYPE (*node);
44468 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44470 warning (OPT_Wattributes, "%qE attribute ignored",
44472 *no_add_attrs = true;
44475 else if ((is_attribute_p ("ms_struct", name)
44476 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44477 || ((is_attribute_p ("gcc_struct", name)
44478 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44480 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44482 *no_add_attrs = true;
44489 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44490 bool *no_add_attrs)
44492 if (TREE_CODE (*node) != FUNCTION_DECL)
44494 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44496 *no_add_attrs = true;
44502 ix86_ms_bitfield_layout_p (const_tree record_type)
44504 return ((TARGET_MS_BITFIELD_LAYOUT
44505 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44506 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44509 /* Returns an expression indicating where the this parameter is
44510 located on entry to the FUNCTION. */
44513 x86_this_parameter (tree function)
44515 tree type = TREE_TYPE (function);
44516 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44521 const int *parm_regs;
44523 if (ix86_function_type_abi (type) == MS_ABI)
44524 parm_regs = x86_64_ms_abi_int_parameter_registers;
44526 parm_regs = x86_64_int_parameter_registers;
44527 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44530 nregs = ix86_function_regparm (type, function);
44532 if (nregs > 0 && !stdarg_p (type))
44535 unsigned int ccvt = ix86_get_callcvt (type);
44537 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44538 regno = aggr ? DX_REG : CX_REG;
44539 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44543 return gen_rtx_MEM (SImode,
44544 plus_constant (Pmode, stack_pointer_rtx, 4));
44553 return gen_rtx_MEM (SImode,
44554 plus_constant (Pmode,
44555 stack_pointer_rtx, 4));
44558 return gen_rtx_REG (SImode, regno);
44561 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44565 /* Determine whether x86_output_mi_thunk can succeed. */
44568 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44569 const_tree function)
44571 /* 64-bit can handle anything. */
44575 /* For 32-bit, everything's fine if we have one free register. */
44576 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44579 /* Need a free register for vcall_offset. */
44583 /* Need a free register for GOT references. */
44584 if (flag_pic && !targetm.binds_local_p (function))
44587 /* Otherwise ok. */
44591 /* Output the assembler code for a thunk function. THUNK_DECL is the
44592 declaration for the thunk function itself, FUNCTION is the decl for
44593 the target function. DELTA is an immediate constant offset to be
44594 added to THIS. If VCALL_OFFSET is nonzero, the word at
44595 *(*this + vcall_offset) should be added to THIS. */
44598 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44599 HOST_WIDE_INT vcall_offset, tree function)
44601 rtx this_param = x86_this_parameter (function);
44602 rtx this_reg, tmp, fnaddr;
44603 unsigned int tmp_regno;
44607 tmp_regno = R10_REG;
44610 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44611 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44612 tmp_regno = AX_REG;
44613 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44614 tmp_regno = DX_REG;
44616 tmp_regno = CX_REG;
44619 emit_note (NOTE_INSN_PROLOGUE_END);
44621 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44622 pull it in now and let DELTA benefit. */
44623 if (REG_P (this_param))
44624 this_reg = this_param;
44625 else if (vcall_offset)
44627 /* Put the this parameter into %eax. */
44628 this_reg = gen_rtx_REG (Pmode, AX_REG);
44629 emit_move_insn (this_reg, this_param);
44632 this_reg = NULL_RTX;
44634 /* Adjust the this parameter by a fixed constant. */
44637 rtx delta_rtx = GEN_INT (delta);
44638 rtx delta_dst = this_reg ? this_reg : this_param;
44642 if (!x86_64_general_operand (delta_rtx, Pmode))
44644 tmp = gen_rtx_REG (Pmode, tmp_regno);
44645 emit_move_insn (tmp, delta_rtx);
44650 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44653 /* Adjust the this parameter by a value stored in the vtable. */
44656 rtx vcall_addr, vcall_mem, this_mem;
44658 tmp = gen_rtx_REG (Pmode, tmp_regno);
44660 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44661 if (Pmode != ptr_mode)
44662 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44663 emit_move_insn (tmp, this_mem);
44665 /* Adjust the this parameter. */
44666 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44668 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44670 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44671 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44672 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44675 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44676 if (Pmode != ptr_mode)
44677 emit_insn (gen_addsi_1_zext (this_reg,
44678 gen_rtx_REG (ptr_mode,
44682 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44685 /* If necessary, drop THIS back to its stack slot. */
44686 if (this_reg && this_reg != this_param)
44687 emit_move_insn (this_param, this_reg);
44689 fnaddr = XEXP (DECL_RTL (function), 0);
44692 if (!flag_pic || targetm.binds_local_p (function)
44697 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44698 tmp = gen_rtx_CONST (Pmode, tmp);
44699 fnaddr = gen_const_mem (Pmode, tmp);
44704 if (!flag_pic || targetm.binds_local_p (function))
44707 else if (TARGET_MACHO)
44709 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44710 fnaddr = XEXP (fnaddr, 0);
44712 #endif /* TARGET_MACHO */
44715 tmp = gen_rtx_REG (Pmode, CX_REG);
44716 output_set_got (tmp, NULL_RTX);
44718 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44719 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44720 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44721 fnaddr = gen_const_mem (Pmode, fnaddr);
44725 /* Our sibling call patterns do not allow memories, because we have no
44726 predicate that can distinguish between frame and non-frame memory.
44727 For our purposes here, we can get away with (ab)using a jump pattern,
44728 because we're going to do no optimization. */
44729 if (MEM_P (fnaddr))
44731 if (sibcall_insn_operand (fnaddr, word_mode))
44733 fnaddr = XEXP (DECL_RTL (function), 0);
44734 tmp = gen_rtx_MEM (QImode, fnaddr);
44735 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44736 tmp = emit_call_insn (tmp);
44737 SIBLING_CALL_P (tmp) = 1;
44740 emit_jump_insn (gen_indirect_jump (fnaddr));
44744 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44746 // CM_LARGE_PIC always uses pseudo PIC register which is
44747 // uninitialized. Since FUNCTION is local and calling it
44748 // doesn't go through PLT, we use scratch register %r11 as
44749 // PIC register and initialize it here.
44750 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44751 ix86_init_large_pic_reg (tmp_regno);
44752 fnaddr = legitimize_pic_address (fnaddr,
44753 gen_rtx_REG (Pmode, tmp_regno));
44756 if (!sibcall_insn_operand (fnaddr, word_mode))
44758 tmp = gen_rtx_REG (word_mode, tmp_regno);
44759 if (GET_MODE (fnaddr) != word_mode)
44760 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44761 emit_move_insn (tmp, fnaddr);
44765 tmp = gen_rtx_MEM (QImode, fnaddr);
44766 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44767 tmp = emit_call_insn (tmp);
44768 SIBLING_CALL_P (tmp) = 1;
44772 /* Emit just enough of rest_of_compilation to get the insns emitted.
44773 Note that use_thunk calls assemble_start_function et al. */
44774 insn = get_insns ();
44775 shorten_branches (insn);
44776 final_start_function (insn, file, 1);
44777 final (insn, file, 1);
44778 final_end_function ();
44782 x86_file_start (void)
44784 default_file_start ();
44786 fputs ("\t.code16gcc\n", asm_out_file);
44788 darwin_file_start ();
44790 if (X86_FILE_START_VERSION_DIRECTIVE)
44791 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44792 if (X86_FILE_START_FLTUSED)
44793 fputs ("\t.global\t__fltused\n", asm_out_file);
44794 if (ix86_asm_dialect == ASM_INTEL)
44795 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44799 x86_field_alignment (tree field, int computed)
44802 tree type = TREE_TYPE (field);
44804 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44807 return iamcu_alignment (type, computed);
44808 mode = TYPE_MODE (strip_array_types (type));
44809 if (mode == DFmode || mode == DCmode
44810 || GET_MODE_CLASS (mode) == MODE_INT
44811 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44812 return MIN (32, computed);
44816 /* Print call to TARGET to FILE. */
44819 x86_print_call_or_nop (FILE *file, const char *target)
44821 if (flag_nop_mcount)
44822 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44824 fprintf (file, "1:\tcall\t%s\n", target);
44827 /* Output assembler code to FILE to increment profiler label # LABELNO
44828 for profiling a function entry. */
44830 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44832 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44836 #ifndef NO_PROFILE_COUNTERS
44837 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44840 if (!TARGET_PECOFF && flag_pic)
44841 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44843 x86_print_call_or_nop (file, mcount_name);
44847 #ifndef NO_PROFILE_COUNTERS
44848 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44851 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44855 #ifndef NO_PROFILE_COUNTERS
44856 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44859 x86_print_call_or_nop (file, mcount_name);
44862 if (flag_record_mcount)
44864 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44865 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44866 fprintf (file, "\t.previous\n");
44870 /* We don't have exact information about the insn sizes, but we may assume
44871 quite safely that we are informed about all 1 byte insns and memory
44872 address sizes. This is enough to eliminate unnecessary padding in
44876 min_insn_size (rtx_insn *insn)
44880 if (!INSN_P (insn) || !active_insn_p (insn))
44883 /* Discard alignments we've emit and jump instructions. */
44884 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44885 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44888 /* Important case - calls are always 5 bytes.
44889 It is common to have many calls in the row. */
44891 && symbolic_reference_mentioned_p (PATTERN (insn))
44892 && !SIBLING_CALL_P (insn))
44894 len = get_attr_length (insn);
44898 /* For normal instructions we rely on get_attr_length being exact,
44899 with a few exceptions. */
44900 if (!JUMP_P (insn))
44902 enum attr_type type = get_attr_type (insn);
44907 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44908 || asm_noperands (PATTERN (insn)) >= 0)
44915 /* Otherwise trust get_attr_length. */
44919 l = get_attr_length_address (insn);
44920 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44929 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44931 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44935 ix86_avoid_jump_mispredicts (void)
44937 rtx_insn *insn, *start = get_insns ();
44938 int nbytes = 0, njumps = 0;
44939 bool isjump = false;
44941 /* Look for all minimal intervals of instructions containing 4 jumps.
44942 The intervals are bounded by START and INSN. NBYTES is the total
44943 size of instructions in the interval including INSN and not including
44944 START. When the NBYTES is smaller than 16 bytes, it is possible
44945 that the end of START and INSN ends up in the same 16byte page.
44947 The smallest offset in the page INSN can start is the case where START
44948 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
44949 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
44951 Don't consider asm goto as jump, while it can contain a jump, it doesn't
44952 have to, control transfer to label(s) can be performed through other
44953 means, and also we estimate minimum length of all asm stmts as 0. */
44954 for (insn = start; insn; insn = NEXT_INSN (insn))
44958 if (LABEL_P (insn))
44960 int align = label_to_alignment (insn);
44961 int max_skip = label_to_max_skip (insn);
44965 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
44966 already in the current 16 byte page, because otherwise
44967 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
44968 bytes to reach 16 byte boundary. */
44970 || (align <= 3 && max_skip != (1 << align) - 1))
44973 fprintf (dump_file, "Label %i with max_skip %i\n",
44974 INSN_UID (insn), max_skip);
44977 while (nbytes + max_skip >= 16)
44979 start = NEXT_INSN (start);
44980 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
44982 njumps--, isjump = true;
44985 nbytes -= min_insn_size (start);
44991 min_size = min_insn_size (insn);
44992 nbytes += min_size;
44994 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
44995 INSN_UID (insn), min_size);
44996 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
45004 start = NEXT_INSN (start);
45005 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45007 njumps--, isjump = true;
45010 nbytes -= min_insn_size (start);
45012 gcc_assert (njumps >= 0);
45014 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
45015 INSN_UID (start), INSN_UID (insn), nbytes);
45017 if (njumps == 3 && isjump && nbytes < 16)
45019 int padsize = 15 - nbytes + min_insn_size (insn);
45022 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
45023 INSN_UID (insn), padsize);
45024 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
45030 /* AMD Athlon works faster
45031 when RET is not destination of conditional jump or directly preceded
45032 by other jump instruction. We avoid the penalty by inserting NOP just
45033 before the RET instructions in such cases. */
45035 ix86_pad_returns (void)
45040 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45042 basic_block bb = e->src;
45043 rtx_insn *ret = BB_END (bb);
45045 bool replace = false;
45047 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
45048 || optimize_bb_for_size_p (bb))
45050 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
45051 if (active_insn_p (prev) || LABEL_P (prev))
45053 if (prev && LABEL_P (prev))
45058 FOR_EACH_EDGE (e, ei, bb->preds)
45059 if (EDGE_FREQUENCY (e) && e->src->index >= 0
45060 && !(e->flags & EDGE_FALLTHRU))
45068 prev = prev_active_insn (ret);
45070 && ((JUMP_P (prev) && any_condjump_p (prev))
45073 /* Empty functions get branch mispredict even when
45074 the jump destination is not visible to us. */
45075 if (!prev && !optimize_function_for_size_p (cfun))
45080 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
45086 /* Count the minimum number of instructions in BB. Return 4 if the
45087 number of instructions >= 4. */
45090 ix86_count_insn_bb (basic_block bb)
45093 int insn_count = 0;
45095 /* Count number of instructions in this block. Return 4 if the number
45096 of instructions >= 4. */
45097 FOR_BB_INSNS (bb, insn)
45099 /* Only happen in exit blocks. */
45101 && ANY_RETURN_P (PATTERN (insn)))
45104 if (NONDEBUG_INSN_P (insn)
45105 && GET_CODE (PATTERN (insn)) != USE
45106 && GET_CODE (PATTERN (insn)) != CLOBBER)
45109 if (insn_count >= 4)
45118 /* Count the minimum number of instructions in code path in BB.
45119 Return 4 if the number of instructions >= 4. */
45122 ix86_count_insn (basic_block bb)
45126 int min_prev_count;
45128 /* Only bother counting instructions along paths with no
45129 more than 2 basic blocks between entry and exit. Given
45130 that BB has an edge to exit, determine if a predecessor
45131 of BB has an edge from entry. If so, compute the number
45132 of instructions in the predecessor block. If there
45133 happen to be multiple such blocks, compute the minimum. */
45134 min_prev_count = 4;
45135 FOR_EACH_EDGE (e, ei, bb->preds)
45138 edge_iterator prev_ei;
45140 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45142 min_prev_count = 0;
45145 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
45147 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45149 int count = ix86_count_insn_bb (e->src);
45150 if (count < min_prev_count)
45151 min_prev_count = count;
45157 if (min_prev_count < 4)
45158 min_prev_count += ix86_count_insn_bb (bb);
45160 return min_prev_count;
45163 /* Pad short function to 4 instructions. */
45166 ix86_pad_short_function (void)
45171 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45173 rtx_insn *ret = BB_END (e->src);
45174 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
45176 int insn_count = ix86_count_insn (e->src);
45178 /* Pad short function. */
45179 if (insn_count < 4)
45181 rtx_insn *insn = ret;
45183 /* Find epilogue. */
45186 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
45187 insn = PREV_INSN (insn);
45192 /* Two NOPs count as one instruction. */
45193 insn_count = 2 * (4 - insn_count);
45194 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
45200 /* Fix up a Windows system unwinder issue. If an EH region falls through into
45201 the epilogue, the Windows system unwinder will apply epilogue logic and
45202 produce incorrect offsets. This can be avoided by adding a nop between
45203 the last insn that can throw and the first insn of the epilogue. */
45206 ix86_seh_fixup_eh_fallthru (void)
45211 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45213 rtx_insn *insn, *next;
45215 /* Find the beginning of the epilogue. */
45216 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
45217 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
45222 /* We only care about preceding insns that can throw. */
45223 insn = prev_active_insn (insn);
45224 if (insn == NULL || !can_throw_internal (insn))
45227 /* Do not separate calls from their debug information. */
45228 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
45230 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
45231 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
45236 emit_insn_after (gen_nops (const1_rtx), insn);
45240 /* Given a register number BASE, the lowest of a group of registers, update
45241 regsets IN and OUT with the registers that should be avoided in input
45242 and output operands respectively when trying to avoid generating a modr/m
45243 byte for -fmitigate-rop. */
45246 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
45248 SET_HARD_REG_BIT (out, base);
45249 SET_HARD_REG_BIT (out, base + 1);
45250 SET_HARD_REG_BIT (in, base + 2);
45251 SET_HARD_REG_BIT (in, base + 3);
45254 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
45255 that certain encodings of modr/m bytes do not occur. */
45257 ix86_mitigate_rop (void)
45259 HARD_REG_SET input_risky;
45260 HARD_REG_SET output_risky;
45261 HARD_REG_SET inout_risky;
45263 CLEAR_HARD_REG_SET (output_risky);
45264 CLEAR_HARD_REG_SET (input_risky);
45265 SET_HARD_REG_BIT (output_risky, AX_REG);
45266 SET_HARD_REG_BIT (output_risky, CX_REG);
45267 SET_HARD_REG_BIT (input_risky, BX_REG);
45268 SET_HARD_REG_BIT (input_risky, DX_REG);
45269 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
45270 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
45271 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
45272 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
45273 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
45274 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
45275 COPY_HARD_REG_SET (inout_risky, input_risky);
45276 IOR_HARD_REG_SET (inout_risky, output_risky);
45278 df_note_add_problem ();
45279 /* Fix up what stack-regs did. */
45280 df_insn_rescan_all ();
45283 regrename_init (true);
45284 regrename_analyze (NULL);
45286 auto_vec<du_head_p> cands;
45288 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
45290 if (!NONDEBUG_INSN_P (insn))
45293 if (GET_CODE (PATTERN (insn)) == USE
45294 || GET_CODE (PATTERN (insn)) == CLOBBER)
45297 extract_insn (insn);
45300 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45301 recog_data.n_operands, &opno0,
45304 if (!ix86_rop_should_change_byte_p (modrm))
45307 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
45309 /* This happens when regrename has to fail a block. */
45310 if (!info->op_info)
45313 if (info->op_info[opno0].n_chains != 0)
45315 gcc_assert (info->op_info[opno0].n_chains == 1);
45317 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
45318 if (op0c->target_data_1 + op0c->target_data_2 == 0
45319 && !op0c->cannot_rename)
45320 cands.safe_push (op0c);
45322 op0c->target_data_1++;
45324 if (info->op_info[opno1].n_chains != 0)
45326 gcc_assert (info->op_info[opno1].n_chains == 1);
45328 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
45329 if (op1c->target_data_1 + op1c->target_data_2 == 0
45330 && !op1c->cannot_rename)
45331 cands.safe_push (op1c);
45333 op1c->target_data_2++;
45339 FOR_EACH_VEC_ELT (cands, i, head)
45341 int old_reg, best_reg;
45342 HARD_REG_SET unavailable;
45344 CLEAR_HARD_REG_SET (unavailable);
45345 if (head->target_data_1)
45346 IOR_HARD_REG_SET (unavailable, output_risky);
45347 if (head->target_data_2)
45348 IOR_HARD_REG_SET (unavailable, input_risky);
45351 reg_class superclass = regrename_find_superclass (head, &n_uses,
45353 old_reg = head->regno;
45354 best_reg = find_rename_reg (head, superclass, &unavailable,
45356 bool ok = regrename_do_replace (head, best_reg);
45359 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
45360 reg_names[best_reg], reg_class_names[superclass]);
45364 regrename_finish ();
45371 INIT_REG_SET (&live);
45373 FOR_EACH_BB_FN (bb, cfun)
45377 COPY_REG_SET (&live, DF_LR_OUT (bb));
45378 df_simulate_initialize_backwards (bb, &live);
45380 FOR_BB_INSNS_REVERSE (bb, insn)
45382 if (!NONDEBUG_INSN_P (insn))
45385 df_simulate_one_insn_backwards (bb, insn, &live);
45387 if (GET_CODE (PATTERN (insn)) == USE
45388 || GET_CODE (PATTERN (insn)) == CLOBBER)
45391 extract_insn (insn);
45392 constrain_operands_cached (insn, reload_completed);
45394 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45395 recog_data.n_operands, &opno0,
45398 || !ix86_rop_should_change_byte_p (modrm)
45402 rtx oldreg = recog_data.operand[opno1];
45403 preprocess_constraints (insn);
45404 const operand_alternative *alt = which_op_alt ();
45407 for (i = 0; i < recog_data.n_operands; i++)
45409 && alt[i].earlyclobber
45410 && reg_overlap_mentioned_p (recog_data.operand[i],
45414 if (i < recog_data.n_operands)
45418 fprintf (dump_file,
45419 "attempting to fix modrm byte in insn %d:"
45420 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
45421 reg_class_names[alt[opno1].cl]);
45423 HARD_REG_SET unavailable;
45424 REG_SET_TO_HARD_REG_SET (unavailable, &live);
45425 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
45426 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
45427 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
45428 IOR_HARD_REG_SET (unavailable, output_risky);
45429 IOR_COMPL_HARD_REG_SET (unavailable,
45430 reg_class_contents[alt[opno1].cl]);
45432 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45433 if (!TEST_HARD_REG_BIT (unavailable, i))
45435 if (i == FIRST_PSEUDO_REGISTER)
45438 fprintf (dump_file, ", none available\n");
45442 fprintf (dump_file, " -> %d\n", i);
45443 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
45444 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
45445 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
45450 /* Implement machine specific optimizations. We implement padding of returns
45451 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
45455 /* We are freeing block_for_insn in the toplev to keep compatibility
45456 with old MDEP_REORGS that are not CFG based. Recompute it now. */
45457 compute_bb_for_insn ();
45459 if (flag_mitigate_rop)
45460 ix86_mitigate_rop ();
45462 if (TARGET_SEH && current_function_has_exception_handlers ())
45463 ix86_seh_fixup_eh_fallthru ();
45465 if (optimize && optimize_function_for_speed_p (cfun))
45467 if (TARGET_PAD_SHORT_FUNCTION)
45468 ix86_pad_short_function ();
45469 else if (TARGET_PAD_RETURNS)
45470 ix86_pad_returns ();
45471 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45472 if (TARGET_FOUR_JUMP_LIMIT)
45473 ix86_avoid_jump_mispredicts ();
45478 /* Return nonzero when QImode register that must be represented via REX prefix
45481 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
45484 extract_insn_cached (insn);
45485 for (i = 0; i < recog_data.n_operands; i++)
45486 if (GENERAL_REG_P (recog_data.operand[i])
45487 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
45492 /* Return true when INSN mentions register that must be encoded using REX
45495 x86_extended_reg_mentioned_p (rtx insn)
45497 subrtx_iterator::array_type array;
45498 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45500 const_rtx x = *iter;
45502 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45508 /* If profitable, negate (without causing overflow) integer constant
45509 of mode MODE at location LOC. Return true in this case. */
45511 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45515 if (!CONST_INT_P (*loc))
45521 /* DImode x86_64 constants must fit in 32 bits. */
45522 gcc_assert (x86_64_immediate_operand (*loc, mode));
45533 gcc_unreachable ();
45536 /* Avoid overflows. */
45537 if (mode_signbit_p (mode, *loc))
45540 val = INTVAL (*loc);
45542 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45543 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45544 if ((val < 0 && val != -128)
45547 *loc = GEN_INT (-val);
45554 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45555 optabs would emit if we didn't have TFmode patterns. */
45558 x86_emit_floatuns (rtx operands[2])
45560 rtx_code_label *neglab, *donelab;
45561 rtx i0, i1, f0, in, out;
45562 machine_mode mode, inmode;
45564 inmode = GET_MODE (operands[1]);
45565 gcc_assert (inmode == SImode || inmode == DImode);
45568 in = force_reg (inmode, operands[1]);
45569 mode = GET_MODE (out);
45570 neglab = gen_label_rtx ();
45571 donelab = gen_label_rtx ();
45572 f0 = gen_reg_rtx (mode);
45574 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45576 expand_float (out, in, 0);
45578 emit_jump_insn (gen_jump (donelab));
45581 emit_label (neglab);
45583 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45585 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45587 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45589 expand_float (f0, i0, 0);
45591 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45593 emit_label (donelab);
45596 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45597 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45598 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45599 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45601 /* Get a vector mode of the same size as the original but with elements
45602 twice as wide. This is only guaranteed to apply to integral vectors. */
45604 static inline machine_mode
45605 get_mode_wider_vector (machine_mode o)
45607 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45608 machine_mode n = GET_MODE_WIDER_MODE (o);
45609 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45610 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45614 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45615 fill target with val via vec_duplicate. */
45618 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45624 /* First attempt to recognize VAL as-is. */
45625 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45626 insn = emit_insn (gen_rtx_SET (target, dup));
45627 if (recog_memoized (insn) < 0)
45630 /* If that fails, force VAL into a register. */
45633 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45634 seq = get_insns ();
45637 emit_insn_before (seq, insn);
45639 ok = recog_memoized (insn) >= 0;
45645 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45646 with all elements equal to VAR. Return true if successful. */
45649 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45650 rtx target, rtx val)
45674 return ix86_vector_duplicate_value (mode, target, val);
45679 if (TARGET_SSE || TARGET_3DNOW_A)
45683 val = gen_lowpart (SImode, val);
45684 x = gen_rtx_TRUNCATE (HImode, val);
45685 x = gen_rtx_VEC_DUPLICATE (mode, x);
45686 emit_insn (gen_rtx_SET (target, x));
45698 return ix86_vector_duplicate_value (mode, target, val);
45702 struct expand_vec_perm_d dperm;
45706 memset (&dperm, 0, sizeof (dperm));
45707 dperm.target = target;
45708 dperm.vmode = mode;
45709 dperm.nelt = GET_MODE_NUNITS (mode);
45710 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45711 dperm.one_operand_p = true;
45713 /* Extend to SImode using a paradoxical SUBREG. */
45714 tmp1 = gen_reg_rtx (SImode);
45715 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45717 /* Insert the SImode value as low element of a V4SImode vector. */
45718 tmp2 = gen_reg_rtx (V4SImode);
45719 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45720 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45722 ok = (expand_vec_perm_1 (&dperm)
45723 || expand_vec_perm_broadcast_1 (&dperm));
45731 return ix86_vector_duplicate_value (mode, target, val);
45738 /* Replicate the value once into the next wider mode and recurse. */
45740 machine_mode smode, wsmode, wvmode;
45743 smode = GET_MODE_INNER (mode);
45744 wvmode = get_mode_wider_vector (mode);
45745 wsmode = GET_MODE_INNER (wvmode);
45747 val = convert_modes (wsmode, smode, val, true);
45748 x = expand_simple_binop (wsmode, ASHIFT, val,
45749 GEN_INT (GET_MODE_BITSIZE (smode)),
45750 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45751 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45753 x = gen_reg_rtx (wvmode);
45754 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45756 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45763 return ix86_vector_duplicate_value (mode, target, val);
45766 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45767 rtx x = gen_reg_rtx (hvmode);
45769 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45772 x = gen_rtx_VEC_CONCAT (mode, x, x);
45773 emit_insn (gen_rtx_SET (target, x));
45779 if (TARGET_AVX512BW)
45780 return ix86_vector_duplicate_value (mode, target, val);
45783 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45784 rtx x = gen_reg_rtx (hvmode);
45786 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45789 x = gen_rtx_VEC_CONCAT (mode, x, x);
45790 emit_insn (gen_rtx_SET (target, x));
45799 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45800 whose ONE_VAR element is VAR, and other elements are zero. Return true
45804 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45805 rtx target, rtx var, int one_var)
45807 machine_mode vsimode;
45810 bool use_vector_set = false;
45815 /* For SSE4.1, we normally use vector set. But if the second
45816 element is zero and inter-unit moves are OK, we use movq
45818 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45819 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45825 use_vector_set = TARGET_SSE4_1;
45828 use_vector_set = TARGET_SSE2;
45831 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45838 use_vector_set = TARGET_AVX;
45841 /* Use ix86_expand_vector_set in 64bit mode only. */
45842 use_vector_set = TARGET_AVX && TARGET_64BIT;
45848 if (use_vector_set)
45850 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45851 var = force_reg (GET_MODE_INNER (mode), var);
45852 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45868 var = force_reg (GET_MODE_INNER (mode), var);
45869 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45870 emit_insn (gen_rtx_SET (target, x));
45875 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45876 new_target = gen_reg_rtx (mode);
45878 new_target = target;
45879 var = force_reg (GET_MODE_INNER (mode), var);
45880 x = gen_rtx_VEC_DUPLICATE (mode, var);
45881 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45882 emit_insn (gen_rtx_SET (new_target, x));
45885 /* We need to shuffle the value to the correct position, so
45886 create a new pseudo to store the intermediate result. */
45888 /* With SSE2, we can use the integer shuffle insns. */
45889 if (mode != V4SFmode && TARGET_SSE2)
45891 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45893 GEN_INT (one_var == 1 ? 0 : 1),
45894 GEN_INT (one_var == 2 ? 0 : 1),
45895 GEN_INT (one_var == 3 ? 0 : 1)));
45896 if (target != new_target)
45897 emit_move_insn (target, new_target);
45901 /* Otherwise convert the intermediate result to V4SFmode and
45902 use the SSE1 shuffle instructions. */
45903 if (mode != V4SFmode)
45905 tmp = gen_reg_rtx (V4SFmode);
45906 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45911 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45913 GEN_INT (one_var == 1 ? 0 : 1),
45914 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45915 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45917 if (mode != V4SFmode)
45918 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45919 else if (tmp != target)
45920 emit_move_insn (target, tmp);
45922 else if (target != new_target)
45923 emit_move_insn (target, new_target);
45928 vsimode = V4SImode;
45934 vsimode = V2SImode;
45940 /* Zero extend the variable element to SImode and recurse. */
45941 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
45943 x = gen_reg_rtx (vsimode);
45944 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
45946 gcc_unreachable ();
45948 emit_move_insn (target, gen_lowpart (mode, x));
45956 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45957 consisting of the values in VALS. It is known that all elements
45958 except ONE_VAR are constants. Return true if successful. */
45961 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
45962 rtx target, rtx vals, int one_var)
45964 rtx var = XVECEXP (vals, 0, one_var);
45965 machine_mode wmode;
45968 const_vec = copy_rtx (vals);
45969 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
45970 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
45978 /* For the two element vectors, it's just as easy to use
45979 the general case. */
45983 /* Use ix86_expand_vector_set in 64bit mode only. */
46006 /* There's no way to set one QImode entry easily. Combine
46007 the variable value with its adjacent constant value, and
46008 promote to an HImode set. */
46009 x = XVECEXP (vals, 0, one_var ^ 1);
46012 var = convert_modes (HImode, QImode, var, true);
46013 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
46014 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46015 x = GEN_INT (INTVAL (x) & 0xff);
46019 var = convert_modes (HImode, QImode, var, true);
46020 x = gen_int_mode (INTVAL (x) << 8, HImode);
46022 if (x != const0_rtx)
46023 var = expand_simple_binop (HImode, IOR, var, x, var,
46024 1, OPTAB_LIB_WIDEN);
46026 x = gen_reg_rtx (wmode);
46027 emit_move_insn (x, gen_lowpart (wmode, const_vec));
46028 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
46030 emit_move_insn (target, gen_lowpart (mode, x));
46037 emit_move_insn (target, const_vec);
46038 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46042 /* A subroutine of ix86_expand_vector_init_general. Use vector
46043 concatenate to handle the most general case: all values variable,
46044 and none identical. */
46047 ix86_expand_vector_init_concat (machine_mode mode,
46048 rtx target, rtx *ops, int n)
46050 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
46051 rtx first[16], second[8], third[4];
46103 gcc_unreachable ();
46106 if (!register_operand (ops[1], cmode))
46107 ops[1] = force_reg (cmode, ops[1]);
46108 if (!register_operand (ops[0], cmode))
46109 ops[0] = force_reg (cmode, ops[0]);
46110 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
46130 gcc_unreachable ();
46154 gcc_unreachable ();
46172 gcc_unreachable ();
46177 /* FIXME: We process inputs backward to help RA. PR 36222. */
46180 for (; i > 0; i -= 2, j--)
46182 first[j] = gen_reg_rtx (cmode);
46183 v = gen_rtvec (2, ops[i - 1], ops[i]);
46184 ix86_expand_vector_init (false, first[j],
46185 gen_rtx_PARALLEL (cmode, v));
46191 gcc_assert (hmode != VOIDmode);
46192 gcc_assert (gmode != VOIDmode);
46193 for (i = j = 0; i < n; i += 2, j++)
46195 second[j] = gen_reg_rtx (hmode);
46196 ix86_expand_vector_init_concat (hmode, second [j],
46200 for (i = j = 0; i < n; i += 2, j++)
46202 third[j] = gen_reg_rtx (gmode);
46203 ix86_expand_vector_init_concat (gmode, third[j],
46207 ix86_expand_vector_init_concat (mode, target, third, n);
46211 gcc_assert (hmode != VOIDmode);
46212 for (i = j = 0; i < n; i += 2, j++)
46214 second[j] = gen_reg_rtx (hmode);
46215 ix86_expand_vector_init_concat (hmode, second [j],
46219 ix86_expand_vector_init_concat (mode, target, second, n);
46222 ix86_expand_vector_init_concat (mode, target, first, n);
46226 gcc_unreachable ();
46230 /* A subroutine of ix86_expand_vector_init_general. Use vector
46231 interleave to handle the most general case: all values variable,
46232 and none identical. */
46235 ix86_expand_vector_init_interleave (machine_mode mode,
46236 rtx target, rtx *ops, int n)
46238 machine_mode first_imode, second_imode, third_imode, inner_mode;
46241 rtx (*gen_load_even) (rtx, rtx, rtx);
46242 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
46243 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
46248 gen_load_even = gen_vec_setv8hi;
46249 gen_interleave_first_low = gen_vec_interleave_lowv4si;
46250 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46251 inner_mode = HImode;
46252 first_imode = V4SImode;
46253 second_imode = V2DImode;
46254 third_imode = VOIDmode;
46257 gen_load_even = gen_vec_setv16qi;
46258 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
46259 gen_interleave_second_low = gen_vec_interleave_lowv4si;
46260 inner_mode = QImode;
46261 first_imode = V8HImode;
46262 second_imode = V4SImode;
46263 third_imode = V2DImode;
46266 gcc_unreachable ();
46269 for (i = 0; i < n; i++)
46271 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
46272 op0 = gen_reg_rtx (SImode);
46273 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
46275 /* Insert the SImode value as low element of V4SImode vector. */
46276 op1 = gen_reg_rtx (V4SImode);
46277 op0 = gen_rtx_VEC_MERGE (V4SImode,
46278 gen_rtx_VEC_DUPLICATE (V4SImode,
46280 CONST0_RTX (V4SImode),
46282 emit_insn (gen_rtx_SET (op1, op0));
46284 /* Cast the V4SImode vector back to a vector in orignal mode. */
46285 op0 = gen_reg_rtx (mode);
46286 emit_move_insn (op0, gen_lowpart (mode, op1));
46288 /* Load even elements into the second position. */
46289 emit_insn (gen_load_even (op0,
46290 force_reg (inner_mode,
46294 /* Cast vector to FIRST_IMODE vector. */
46295 ops[i] = gen_reg_rtx (first_imode);
46296 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
46299 /* Interleave low FIRST_IMODE vectors. */
46300 for (i = j = 0; i < n; i += 2, j++)
46302 op0 = gen_reg_rtx (first_imode);
46303 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
46305 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
46306 ops[j] = gen_reg_rtx (second_imode);
46307 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
46310 /* Interleave low SECOND_IMODE vectors. */
46311 switch (second_imode)
46314 for (i = j = 0; i < n / 2; i += 2, j++)
46316 op0 = gen_reg_rtx (second_imode);
46317 emit_insn (gen_interleave_second_low (op0, ops[i],
46320 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
46322 ops[j] = gen_reg_rtx (third_imode);
46323 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
46325 second_imode = V2DImode;
46326 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46330 op0 = gen_reg_rtx (second_imode);
46331 emit_insn (gen_interleave_second_low (op0, ops[0],
46334 /* Cast the SECOND_IMODE vector back to a vector on original
46336 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
46340 gcc_unreachable ();
46344 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
46345 all values variable, and none identical. */
46348 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
46349 rtx target, rtx vals)
46351 rtx ops[64], op0, op1, op2, op3, op4, op5;
46352 machine_mode half_mode = VOIDmode;
46353 machine_mode quarter_mode = VOIDmode;
46360 if (!mmx_ok && !TARGET_SSE)
46376 n = GET_MODE_NUNITS (mode);
46377 for (i = 0; i < n; i++)
46378 ops[i] = XVECEXP (vals, 0, i);
46379 ix86_expand_vector_init_concat (mode, target, ops, n);
46383 half_mode = V16QImode;
46387 half_mode = V8HImode;
46391 n = GET_MODE_NUNITS (mode);
46392 for (i = 0; i < n; i++)
46393 ops[i] = XVECEXP (vals, 0, i);
46394 op0 = gen_reg_rtx (half_mode);
46395 op1 = gen_reg_rtx (half_mode);
46396 ix86_expand_vector_init_interleave (half_mode, op0, ops,
46398 ix86_expand_vector_init_interleave (half_mode, op1,
46399 &ops [n >> 1], n >> 2);
46400 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
46404 quarter_mode = V16QImode;
46405 half_mode = V32QImode;
46409 quarter_mode = V8HImode;
46410 half_mode = V16HImode;
46414 n = GET_MODE_NUNITS (mode);
46415 for (i = 0; i < n; i++)
46416 ops[i] = XVECEXP (vals, 0, i);
46417 op0 = gen_reg_rtx (quarter_mode);
46418 op1 = gen_reg_rtx (quarter_mode);
46419 op2 = gen_reg_rtx (quarter_mode);
46420 op3 = gen_reg_rtx (quarter_mode);
46421 op4 = gen_reg_rtx (half_mode);
46422 op5 = gen_reg_rtx (half_mode);
46423 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
46425 ix86_expand_vector_init_interleave (quarter_mode, op1,
46426 &ops [n >> 2], n >> 3);
46427 ix86_expand_vector_init_interleave (quarter_mode, op2,
46428 &ops [n >> 1], n >> 3);
46429 ix86_expand_vector_init_interleave (quarter_mode, op3,
46430 &ops [(n >> 1) | (n >> 2)], n >> 3);
46431 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
46432 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
46433 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
46437 if (!TARGET_SSE4_1)
46445 /* Don't use ix86_expand_vector_init_interleave if we can't
46446 move from GPR to SSE register directly. */
46447 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
46450 n = GET_MODE_NUNITS (mode);
46451 for (i = 0; i < n; i++)
46452 ops[i] = XVECEXP (vals, 0, i);
46453 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
46461 gcc_unreachable ();
46465 int i, j, n_elts, n_words, n_elt_per_word;
46466 machine_mode inner_mode;
46467 rtx words[4], shift;
46469 inner_mode = GET_MODE_INNER (mode);
46470 n_elts = GET_MODE_NUNITS (mode);
46471 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46472 n_elt_per_word = n_elts / n_words;
46473 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
46475 for (i = 0; i < n_words; ++i)
46477 rtx word = NULL_RTX;
46479 for (j = 0; j < n_elt_per_word; ++j)
46481 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
46482 elt = convert_modes (word_mode, inner_mode, elt, true);
46488 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
46489 word, 1, OPTAB_LIB_WIDEN);
46490 word = expand_simple_binop (word_mode, IOR, word, elt,
46491 word, 1, OPTAB_LIB_WIDEN);
46499 emit_move_insn (target, gen_lowpart (mode, words[0]));
46500 else if (n_words == 2)
46502 rtx tmp = gen_reg_rtx (mode);
46503 emit_clobber (tmp);
46504 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46505 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46506 emit_move_insn (target, tmp);
46508 else if (n_words == 4)
46510 rtx tmp = gen_reg_rtx (V4SImode);
46511 gcc_assert (word_mode == SImode);
46512 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46513 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46514 emit_move_insn (target, gen_lowpart (mode, tmp));
46517 gcc_unreachable ();
46521 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46522 instructions unless MMX_OK is true. */
46525 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46527 machine_mode mode = GET_MODE (target);
46528 machine_mode inner_mode = GET_MODE_INNER (mode);
46529 int n_elts = GET_MODE_NUNITS (mode);
46530 int n_var = 0, one_var = -1;
46531 bool all_same = true, all_const_zero = true;
46535 for (i = 0; i < n_elts; ++i)
46537 x = XVECEXP (vals, 0, i);
46538 if (!(CONST_SCALAR_INT_P (x)
46539 || CONST_DOUBLE_P (x)
46540 || CONST_FIXED_P (x)))
46541 n_var++, one_var = i;
46542 else if (x != CONST0_RTX (inner_mode))
46543 all_const_zero = false;
46544 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46548 /* Constants are best loaded from the constant pool. */
46551 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46555 /* If all values are identical, broadcast the value. */
46557 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46558 XVECEXP (vals, 0, 0)))
46561 /* Values where only one field is non-constant are best loaded from
46562 the pool and overwritten via move later. */
46566 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46567 XVECEXP (vals, 0, one_var),
46571 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46575 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46579 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46581 machine_mode mode = GET_MODE (target);
46582 machine_mode inner_mode = GET_MODE_INNER (mode);
46583 machine_mode half_mode;
46584 bool use_vec_merge = false;
46586 static rtx (*gen_extract[6][2]) (rtx, rtx)
46588 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46589 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46590 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46591 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46592 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46593 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46595 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46597 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46598 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46599 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46600 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46601 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46602 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46605 machine_mode mmode = VOIDmode;
46606 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46614 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46615 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46617 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46619 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46620 emit_insn (gen_rtx_SET (target, tmp));
46626 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46630 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46631 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46633 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46635 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46636 emit_insn (gen_rtx_SET (target, tmp));
46643 /* For the two element vectors, we implement a VEC_CONCAT with
46644 the extraction of the other element. */
46646 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46647 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46650 op0 = val, op1 = tmp;
46652 op0 = tmp, op1 = val;
46654 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46655 emit_insn (gen_rtx_SET (target, tmp));
46660 use_vec_merge = TARGET_SSE4_1;
46667 use_vec_merge = true;
46671 /* tmp = target = A B C D */
46672 tmp = copy_to_reg (target);
46673 /* target = A A B B */
46674 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46675 /* target = X A B B */
46676 ix86_expand_vector_set (false, target, val, 0);
46677 /* target = A X C D */
46678 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46679 const1_rtx, const0_rtx,
46680 GEN_INT (2+4), GEN_INT (3+4)));
46684 /* tmp = target = A B C D */
46685 tmp = copy_to_reg (target);
46686 /* tmp = X B C D */
46687 ix86_expand_vector_set (false, tmp, val, 0);
46688 /* target = A B X D */
46689 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46690 const0_rtx, const1_rtx,
46691 GEN_INT (0+4), GEN_INT (3+4)));
46695 /* tmp = target = A B C D */
46696 tmp = copy_to_reg (target);
46697 /* tmp = X B C D */
46698 ix86_expand_vector_set (false, tmp, val, 0);
46699 /* target = A B X D */
46700 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46701 const0_rtx, const1_rtx,
46702 GEN_INT (2+4), GEN_INT (0+4)));
46706 gcc_unreachable ();
46711 use_vec_merge = TARGET_SSE4_1;
46715 /* Element 0 handled by vec_merge below. */
46718 use_vec_merge = true;
46724 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46725 store into element 0, then shuffle them back. */
46729 order[0] = GEN_INT (elt);
46730 order[1] = const1_rtx;
46731 order[2] = const2_rtx;
46732 order[3] = GEN_INT (3);
46733 order[elt] = const0_rtx;
46735 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46736 order[1], order[2], order[3]));
46738 ix86_expand_vector_set (false, target, val, 0);
46740 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46741 order[1], order[2], order[3]));
46745 /* For SSE1, we have to reuse the V4SF code. */
46746 rtx t = gen_reg_rtx (V4SFmode);
46747 emit_move_insn (t, gen_lowpart (V4SFmode, target));
46748 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46749 emit_move_insn (target, gen_lowpart (mode, t));
46754 use_vec_merge = TARGET_SSE2;
46757 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46761 use_vec_merge = TARGET_SSE4_1;
46768 half_mode = V16QImode;
46774 half_mode = V8HImode;
46780 half_mode = V4SImode;
46786 half_mode = V2DImode;
46792 half_mode = V4SFmode;
46798 half_mode = V2DFmode;
46804 /* Compute offset. */
46808 gcc_assert (i <= 1);
46810 /* Extract the half. */
46811 tmp = gen_reg_rtx (half_mode);
46812 emit_insn (gen_extract[j][i] (tmp, target));
46814 /* Put val in tmp at elt. */
46815 ix86_expand_vector_set (false, tmp, val, elt);
46818 emit_insn (gen_insert[j][i] (target, target, tmp));
46822 if (TARGET_AVX512F)
46825 gen_blendm = gen_avx512f_blendmv8df;
46830 if (TARGET_AVX512F)
46833 gen_blendm = gen_avx512f_blendmv8di;
46838 if (TARGET_AVX512F)
46841 gen_blendm = gen_avx512f_blendmv16sf;
46846 if (TARGET_AVX512F)
46849 gen_blendm = gen_avx512f_blendmv16si;
46854 if (TARGET_AVX512F && TARGET_AVX512BW)
46857 gen_blendm = gen_avx512bw_blendmv32hi;
46862 if (TARGET_AVX512F && TARGET_AVX512BW)
46865 gen_blendm = gen_avx512bw_blendmv64qi;
46873 if (mmode != VOIDmode)
46875 tmp = gen_reg_rtx (mode);
46876 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46877 emit_insn (gen_blendm (target, tmp, target,
46879 gen_int_mode (1 << elt, mmode))));
46881 else if (use_vec_merge)
46883 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46884 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46885 emit_insn (gen_rtx_SET (target, tmp));
46889 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46891 emit_move_insn (mem, target);
46893 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46894 emit_move_insn (tmp, val);
46896 emit_move_insn (target, mem);
46901 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46903 machine_mode mode = GET_MODE (vec);
46904 machine_mode inner_mode = GET_MODE_INNER (mode);
46905 bool use_vec_extr = false;
46918 use_vec_extr = true;
46922 use_vec_extr = TARGET_SSE4_1;
46934 tmp = gen_reg_rtx (mode);
46935 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46936 GEN_INT (elt), GEN_INT (elt),
46937 GEN_INT (elt+4), GEN_INT (elt+4)));
46941 tmp = gen_reg_rtx (mode);
46942 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
46946 gcc_unreachable ();
46949 use_vec_extr = true;
46954 use_vec_extr = TARGET_SSE4_1;
46968 tmp = gen_reg_rtx (mode);
46969 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
46970 GEN_INT (elt), GEN_INT (elt),
46971 GEN_INT (elt), GEN_INT (elt)));
46975 tmp = gen_reg_rtx (mode);
46976 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
46980 gcc_unreachable ();
46983 use_vec_extr = true;
46988 /* For SSE1, we have to reuse the V4SF code. */
46989 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
46990 gen_lowpart (V4SFmode, vec), elt);
46996 use_vec_extr = TARGET_SSE2;
46999 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47003 use_vec_extr = TARGET_SSE4_1;
47009 tmp = gen_reg_rtx (V4SFmode);
47011 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
47013 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
47014 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47022 tmp = gen_reg_rtx (V2DFmode);
47024 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
47026 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
47027 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47035 tmp = gen_reg_rtx (V16QImode);
47037 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
47039 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
47040 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47048 tmp = gen_reg_rtx (V8HImode);
47050 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
47052 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
47053 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47061 tmp = gen_reg_rtx (V4SImode);
47063 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
47065 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
47066 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47074 tmp = gen_reg_rtx (V2DImode);
47076 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
47078 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
47079 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47085 if (TARGET_AVX512BW)
47087 tmp = gen_reg_rtx (V16HImode);
47089 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
47091 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
47092 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47098 if (TARGET_AVX512BW)
47100 tmp = gen_reg_rtx (V32QImode);
47102 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
47104 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
47105 ix86_expand_vector_extract (false, target, tmp, elt & 31);
47111 tmp = gen_reg_rtx (V8SFmode);
47113 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
47115 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
47116 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47120 tmp = gen_reg_rtx (V4DFmode);
47122 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
47124 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
47125 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47129 tmp = gen_reg_rtx (V8SImode);
47131 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
47133 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
47134 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47138 tmp = gen_reg_rtx (V4DImode);
47140 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
47142 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
47143 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47147 /* ??? Could extract the appropriate HImode element and shift. */
47154 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
47155 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
47157 /* Let the rtl optimizers know about the zero extension performed. */
47158 if (inner_mode == QImode || inner_mode == HImode)
47160 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
47161 target = gen_lowpart (SImode, target);
47164 emit_insn (gen_rtx_SET (target, tmp));
47168 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47170 emit_move_insn (mem, vec);
47172 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47173 emit_move_insn (target, tmp);
47177 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
47178 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
47179 The upper bits of DEST are undefined, though they shouldn't cause
47180 exceptions (some bits from src or all zeros are ok). */
47183 emit_reduc_half (rtx dest, rtx src, int i)
47186 switch (GET_MODE (src))
47190 tem = gen_sse_movhlps (dest, src, src);
47192 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
47193 GEN_INT (1 + 4), GEN_INT (1 + 4));
47196 tem = gen_vec_interleave_highv2df (dest, src, src);
47202 d = gen_reg_rtx (V1TImode);
47203 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
47208 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
47210 tem = gen_avx_shufps256 (dest, src, src,
47211 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
47215 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
47217 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
47225 if (GET_MODE (dest) != V4DImode)
47226 d = gen_reg_rtx (V4DImode);
47227 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
47228 gen_lowpart (V4DImode, src),
47233 d = gen_reg_rtx (V2TImode);
47234 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
47245 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
47246 gen_lowpart (V16SImode, src),
47247 gen_lowpart (V16SImode, src),
47248 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
47249 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
47250 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
47251 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
47252 GEN_INT (0xC), GEN_INT (0xD),
47253 GEN_INT (0xE), GEN_INT (0xF),
47254 GEN_INT (0x10), GEN_INT (0x11),
47255 GEN_INT (0x12), GEN_INT (0x13),
47256 GEN_INT (0x14), GEN_INT (0x15),
47257 GEN_INT (0x16), GEN_INT (0x17));
47259 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
47260 gen_lowpart (V16SImode, src),
47261 GEN_INT (i == 128 ? 0x2 : 0x1),
47265 GEN_INT (i == 128 ? 0x6 : 0x5),
47269 GEN_INT (i == 128 ? 0xA : 0x9),
47273 GEN_INT (i == 128 ? 0xE : 0xD),
47279 gcc_unreachable ();
47283 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
47286 /* Expand a vector reduction. FN is the binary pattern to reduce;
47287 DEST is the destination; IN is the input vector. */
47290 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
47292 rtx half, dst, vec = in;
47293 machine_mode mode = GET_MODE (in);
47296 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
47298 && mode == V8HImode
47299 && fn == gen_uminv8hi3)
47301 emit_insn (gen_sse4_1_phminposuw (dest, in));
47305 for (i = GET_MODE_BITSIZE (mode);
47306 i > GET_MODE_UNIT_BITSIZE (mode);
47309 half = gen_reg_rtx (mode);
47310 emit_reduc_half (half, vec, i);
47311 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
47314 dst = gen_reg_rtx (mode);
47315 emit_insn (fn (dst, half, vec));
47320 /* Target hook for scalar_mode_supported_p. */
47322 ix86_scalar_mode_supported_p (machine_mode mode)
47324 if (DECIMAL_FLOAT_MODE_P (mode))
47325 return default_decimal_float_supported_p ();
47326 else if (mode == TFmode)
47329 return default_scalar_mode_supported_p (mode);
47332 /* Implements target hook vector_mode_supported_p. */
47334 ix86_vector_mode_supported_p (machine_mode mode)
47336 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
47338 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
47340 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
47342 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
47344 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
47346 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
47351 /* Implement target hook libgcc_floating_mode_supported_p. */
47353 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
47363 #ifdef IX86_NO_LIBGCC_TFMODE
47365 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
47366 return TARGET_LONG_DOUBLE_128;
47376 /* Target hook for c_mode_for_suffix. */
47377 static machine_mode
47378 ix86_c_mode_for_suffix (char suffix)
47388 /* Worker function for TARGET_MD_ASM_ADJUST.
47390 We implement asm flag outputs, and maintain source compatibility
47391 with the old cc0-based compiler. */
47394 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
47395 vec<const char *> &constraints,
47396 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
47398 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
47399 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
47401 bool saw_asm_flag = false;
47404 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
47406 const char *con = constraints[i];
47407 if (strncmp (con, "=@cc", 4) != 0)
47410 if (strchr (con, ',') != NULL)
47412 error ("alternatives not allowed in asm flag output");
47416 bool invert = false;
47418 invert = true, con++;
47420 machine_mode mode = CCmode;
47421 rtx_code code = UNKNOWN;
47427 mode = CCAmode, code = EQ;
47428 else if (con[1] == 'e' && con[2] == 0)
47429 mode = CCCmode, code = NE;
47433 mode = CCCmode, code = EQ;
47434 else if (con[1] == 'e' && con[2] == 0)
47435 mode = CCAmode, code = NE;
47439 mode = CCCmode, code = EQ;
47443 mode = CCZmode, code = EQ;
47447 mode = CCGCmode, code = GT;
47448 else if (con[1] == 'e' && con[2] == 0)
47449 mode = CCGCmode, code = GE;
47453 mode = CCGCmode, code = LT;
47454 else if (con[1] == 'e' && con[2] == 0)
47455 mode = CCGCmode, code = LE;
47459 mode = CCOmode, code = EQ;
47463 mode = CCPmode, code = EQ;
47467 mode = CCSmode, code = EQ;
47471 mode = CCZmode, code = EQ;
47474 if (code == UNKNOWN)
47476 error ("unknown asm flag output %qs", constraints[i]);
47480 code = reverse_condition (code);
47482 rtx dest = outputs[i];
47485 /* This is the first asm flag output. Here we put the flags
47486 register in as the real output and adjust the condition to
47488 constraints[i] = "=Bf";
47489 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
47490 saw_asm_flag = true;
47494 /* We don't need the flags register as output twice. */
47495 constraints[i] = "=X";
47496 outputs[i] = gen_rtx_SCRATCH (SImode);
47499 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47500 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47502 machine_mode dest_mode = GET_MODE (dest);
47503 if (!SCALAR_INT_MODE_P (dest_mode))
47505 error ("invalid type for asm flag output");
47509 if (dest_mode == DImode && !TARGET_64BIT)
47510 dest_mode = SImode;
47512 if (dest_mode != QImode)
47514 rtx destqi = gen_reg_rtx (QImode);
47515 emit_insn (gen_rtx_SET (destqi, x));
47517 if (TARGET_ZERO_EXTEND_WITH_AND
47518 && optimize_function_for_speed_p (cfun))
47520 x = force_reg (dest_mode, const0_rtx);
47522 emit_insn (gen_movstrictqi
47523 (gen_lowpart (QImode, x), destqi));
47526 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47529 if (dest_mode != GET_MODE (dest))
47531 rtx tmp = gen_reg_rtx (SImode);
47533 emit_insn (gen_rtx_SET (tmp, x));
47534 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47537 emit_insn (gen_rtx_SET (dest, x));
47539 rtx_insn *seq = get_insns ();
47546 /* If we had no asm flag outputs, clobber the flags. */
47547 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47548 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47553 /* Implements target vector targetm.asm.encode_section_info. */
47555 static void ATTRIBUTE_UNUSED
47556 ix86_encode_section_info (tree decl, rtx rtl, int first)
47558 default_encode_section_info (decl, rtl, first);
47560 if (ix86_in_large_data_p (decl))
47561 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47564 /* Worker function for REVERSE_CONDITION. */
47567 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47569 return (mode != CCFPmode && mode != CCFPUmode
47570 ? reverse_condition (code)
47571 : reverse_condition_maybe_unordered (code));
47574 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47578 output_387_reg_move (rtx insn, rtx *operands)
47580 if (REG_P (operands[0]))
47582 if (REG_P (operands[1])
47583 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47585 if (REGNO (operands[0]) == FIRST_STACK_REG)
47586 return output_387_ffreep (operands, 0);
47587 return "fstp\t%y0";
47589 if (STACK_TOP_P (operands[0]))
47590 return "fld%Z1\t%y1";
47593 else if (MEM_P (operands[0]))
47595 gcc_assert (REG_P (operands[1]));
47596 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47597 return "fstp%Z0\t%y0";
47600 /* There is no non-popping store to memory for XFmode.
47601 So if we need one, follow the store with a load. */
47602 if (GET_MODE (operands[0]) == XFmode)
47603 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47605 return "fst%Z0\t%y0";
47612 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47613 FP status register is set. */
47616 ix86_emit_fp_unordered_jump (rtx label)
47618 rtx reg = gen_reg_rtx (HImode);
47621 emit_insn (gen_x86_fnstsw_1 (reg));
47623 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47625 emit_insn (gen_x86_sahf_1 (reg));
47627 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47628 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47632 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47634 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47635 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47638 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47639 gen_rtx_LABEL_REF (VOIDmode, label),
47641 temp = gen_rtx_SET (pc_rtx, temp);
47643 emit_jump_insn (temp);
47644 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47647 /* Output code to perform a log1p XFmode calculation. */
47649 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47651 rtx_code_label *label1 = gen_label_rtx ();
47652 rtx_code_label *label2 = gen_label_rtx ();
47654 rtx tmp = gen_reg_rtx (XFmode);
47655 rtx tmp2 = gen_reg_rtx (XFmode);
47658 emit_insn (gen_absxf2 (tmp, op1));
47659 test = gen_rtx_GE (VOIDmode, tmp,
47660 const_double_from_real_value (
47661 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47663 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47665 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47666 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47667 emit_jump (label2);
47669 emit_label (label1);
47670 emit_move_insn (tmp, CONST1_RTX (XFmode));
47671 emit_insn (gen_addxf3 (tmp, op1, tmp));
47672 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47673 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47675 emit_label (label2);
47678 /* Emit code for round calculation. */
47679 void ix86_emit_i387_round (rtx op0, rtx op1)
47681 machine_mode inmode = GET_MODE (op1);
47682 machine_mode outmode = GET_MODE (op0);
47683 rtx e1, e2, res, tmp, tmp1, half;
47684 rtx scratch = gen_reg_rtx (HImode);
47685 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47686 rtx_code_label *jump_label = gen_label_rtx ();
47688 rtx (*gen_abs) (rtx, rtx);
47689 rtx (*gen_neg) (rtx, rtx);
47694 gen_abs = gen_abssf2;
47697 gen_abs = gen_absdf2;
47700 gen_abs = gen_absxf2;
47703 gcc_unreachable ();
47709 gen_neg = gen_negsf2;
47712 gen_neg = gen_negdf2;
47715 gen_neg = gen_negxf2;
47718 gen_neg = gen_neghi2;
47721 gen_neg = gen_negsi2;
47724 gen_neg = gen_negdi2;
47727 gcc_unreachable ();
47730 e1 = gen_reg_rtx (inmode);
47731 e2 = gen_reg_rtx (inmode);
47732 res = gen_reg_rtx (outmode);
47734 half = const_double_from_real_value (dconsthalf, inmode);
47736 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47738 /* scratch = fxam(op1) */
47739 emit_insn (gen_rtx_SET (scratch,
47740 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47742 /* e1 = fabs(op1) */
47743 emit_insn (gen_abs (e1, op1));
47745 /* e2 = e1 + 0.5 */
47746 half = force_reg (inmode, half);
47747 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47749 /* res = floor(e2) */
47750 if (inmode != XFmode)
47752 tmp1 = gen_reg_rtx (XFmode);
47754 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47764 rtx tmp0 = gen_reg_rtx (XFmode);
47766 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47768 emit_insn (gen_rtx_SET (res,
47769 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47770 UNSPEC_TRUNC_NOOP)));
47774 emit_insn (gen_frndintxf2_floor (res, tmp1));
47777 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47780 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47783 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47786 gcc_unreachable ();
47789 /* flags = signbit(a) */
47790 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47792 /* if (flags) then res = -res */
47793 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47794 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47795 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47797 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47798 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47799 JUMP_LABEL (insn) = jump_label;
47801 emit_insn (gen_neg (res, res));
47803 emit_label (jump_label);
47804 LABEL_NUSES (jump_label) = 1;
47806 emit_move_insn (op0, res);
47809 /* Output code to perform a Newton-Rhapson approximation of a single precision
47810 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47812 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47814 rtx x0, x1, e0, e1;
47816 x0 = gen_reg_rtx (mode);
47817 e0 = gen_reg_rtx (mode);
47818 e1 = gen_reg_rtx (mode);
47819 x1 = gen_reg_rtx (mode);
47821 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47823 b = force_reg (mode, b);
47825 /* x0 = rcp(b) estimate */
47826 if (mode == V16SFmode || mode == V8DFmode)
47827 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47830 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47834 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47837 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47840 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47843 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47846 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47849 /* Output code to perform a Newton-Rhapson approximation of a
47850 single precision floating point [reciprocal] square root. */
47852 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
47854 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47858 x0 = gen_reg_rtx (mode);
47859 e0 = gen_reg_rtx (mode);
47860 e1 = gen_reg_rtx (mode);
47861 e2 = gen_reg_rtx (mode);
47862 e3 = gen_reg_rtx (mode);
47864 real_from_integer (&r, VOIDmode, -3, SIGNED);
47865 mthree = const_double_from_real_value (r, SFmode);
47867 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47868 mhalf = const_double_from_real_value (r, SFmode);
47869 unspec = UNSPEC_RSQRT;
47871 if (VECTOR_MODE_P (mode))
47873 mthree = ix86_build_const_vector (mode, true, mthree);
47874 mhalf = ix86_build_const_vector (mode, true, mhalf);
47875 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47876 if (GET_MODE_SIZE (mode) == 64)
47877 unspec = UNSPEC_RSQRT14;
47880 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47881 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47883 a = force_reg (mode, a);
47885 /* x0 = rsqrt(a) estimate */
47886 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47889 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47892 rtx zero = force_reg (mode, CONST0_RTX(mode));
47895 /* Handle masked compare. */
47896 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47898 mask = gen_reg_rtx (HImode);
47899 /* Imm value 0x4 corresponds to not-equal comparison. */
47900 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47901 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47905 mask = gen_reg_rtx (mode);
47906 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47907 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47912 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47914 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47917 mthree = force_reg (mode, mthree);
47918 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47920 mhalf = force_reg (mode, mhalf);
47922 /* e3 = -.5 * x0 */
47923 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47925 /* e3 = -.5 * e0 */
47926 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47927 /* ret = e2 * e3 */
47928 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47931 #ifdef TARGET_SOLARIS
47932 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47935 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47938 /* With Binutils 2.15, the "@unwind" marker must be specified on
47939 every occurrence of the ".eh_frame" section, not just the first
47942 && strcmp (name, ".eh_frame") == 0)
47944 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
47945 flags & SECTION_WRITE ? "aw" : "a");
47950 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
47952 solaris_elf_asm_comdat_section (name, flags, decl);
47957 default_elf_asm_named_section (name, flags, decl);
47959 #endif /* TARGET_SOLARIS */
47961 /* Return the mangling of TYPE if it is an extended fundamental type. */
47963 static const char *
47964 ix86_mangle_type (const_tree type)
47966 type = TYPE_MAIN_VARIANT (type);
47968 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
47969 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
47972 switch (TYPE_MODE (type))
47975 /* __float128 is "g". */
47978 /* "long double" or __float80 is "e". */
47985 /* For 32-bit code we can save PIC register setup by using
47986 __stack_chk_fail_local hidden function instead of calling
47987 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
47988 register, so it is better to call __stack_chk_fail directly. */
47990 static tree ATTRIBUTE_UNUSED
47991 ix86_stack_protect_fail (void)
47993 return TARGET_64BIT
47994 ? default_external_stack_protect_fail ()
47995 : default_hidden_stack_protect_fail ();
47998 /* Select a format to encode pointers in exception handling data. CODE
47999 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
48000 true if the symbol may be affected by dynamic relocations.
48002 ??? All x86 object file formats are capable of representing this.
48003 After all, the relocation needed is the same as for the call insn.
48004 Whether or not a particular assembler allows us to enter such, I
48005 guess we'll have to see. */
48007 asm_preferred_eh_data_format (int code, int global)
48011 int type = DW_EH_PE_sdata8;
48013 || ix86_cmodel == CM_SMALL_PIC
48014 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
48015 type = DW_EH_PE_sdata4;
48016 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
48018 if (ix86_cmodel == CM_SMALL
48019 || (ix86_cmodel == CM_MEDIUM && code))
48020 return DW_EH_PE_udata4;
48021 return DW_EH_PE_absptr;
48024 /* Expand copysign from SIGN to the positive value ABS_VALUE
48025 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
48028 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
48030 machine_mode mode = GET_MODE (sign);
48031 rtx sgn = gen_reg_rtx (mode);
48032 if (mask == NULL_RTX)
48034 machine_mode vmode;
48036 if (mode == SFmode)
48038 else if (mode == DFmode)
48043 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
48044 if (!VECTOR_MODE_P (mode))
48046 /* We need to generate a scalar mode mask in this case. */
48047 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48048 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48049 mask = gen_reg_rtx (mode);
48050 emit_insn (gen_rtx_SET (mask, tmp));
48054 mask = gen_rtx_NOT (mode, mask);
48055 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
48056 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
48059 /* Expand fabs (OP0) and return a new rtx that holds the result. The
48060 mask for masking out the sign-bit is stored in *SMASK, if that is
48063 ix86_expand_sse_fabs (rtx op0, rtx *smask)
48065 machine_mode vmode, mode = GET_MODE (op0);
48068 xa = gen_reg_rtx (mode);
48069 if (mode == SFmode)
48071 else if (mode == DFmode)
48075 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
48076 if (!VECTOR_MODE_P (mode))
48078 /* We need to generate a scalar mode mask in this case. */
48079 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48080 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48081 mask = gen_reg_rtx (mode);
48082 emit_insn (gen_rtx_SET (mask, tmp));
48084 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
48092 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
48093 swapping the operands if SWAP_OPERANDS is true. The expanded
48094 code is a forward jump to a newly created label in case the
48095 comparison is true. The generated label rtx is returned. */
48096 static rtx_code_label *
48097 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
48098 bool swap_operands)
48100 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
48101 rtx_code_label *label;
48105 std::swap (op0, op1);
48107 label = gen_label_rtx ();
48108 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
48109 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
48110 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
48111 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
48112 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
48113 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48114 JUMP_LABEL (tmp) = label;
48119 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
48120 using comparison code CODE. Operands are swapped for the comparison if
48121 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
48123 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
48124 bool swap_operands)
48126 rtx (*insn)(rtx, rtx, rtx, rtx);
48127 machine_mode mode = GET_MODE (op0);
48128 rtx mask = gen_reg_rtx (mode);
48131 std::swap (op0, op1);
48133 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
48135 emit_insn (insn (mask, op0, op1,
48136 gen_rtx_fmt_ee (code, mode, op0, op1)));
48140 /* Generate and return a rtx of mode MODE for 2**n where n is the number
48141 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
48143 ix86_gen_TWO52 (machine_mode mode)
48145 REAL_VALUE_TYPE TWO52r;
48148 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
48149 TWO52 = const_double_from_real_value (TWO52r, mode);
48150 TWO52 = force_reg (mode, TWO52);
48155 /* Expand SSE sequence for computing lround from OP1 storing
48158 ix86_expand_lround (rtx op0, rtx op1)
48160 /* C code for the stuff we're doing below:
48161 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
48164 machine_mode mode = GET_MODE (op1);
48165 const struct real_format *fmt;
48166 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48169 /* load nextafter (0.5, 0.0) */
48170 fmt = REAL_MODE_FORMAT (mode);
48171 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48172 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48174 /* adj = copysign (0.5, op1) */
48175 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
48176 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
48178 /* adj = op1 + adj */
48179 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
48181 /* op0 = (imode)adj */
48182 expand_fix (op0, adj, 0);
48185 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
48188 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
48190 /* C code for the stuff we're doing below (for do_floor):
48192 xi -= (double)xi > op1 ? 1 : 0;
48195 machine_mode fmode = GET_MODE (op1);
48196 machine_mode imode = GET_MODE (op0);
48197 rtx ireg, freg, tmp;
48198 rtx_code_label *label;
48200 /* reg = (long)op1 */
48201 ireg = gen_reg_rtx (imode);
48202 expand_fix (ireg, op1, 0);
48204 /* freg = (double)reg */
48205 freg = gen_reg_rtx (fmode);
48206 expand_float (freg, ireg, 0);
48208 /* ireg = (freg > op1) ? ireg - 1 : ireg */
48209 label = ix86_expand_sse_compare_and_jump (UNLE,
48210 freg, op1, !do_floor);
48211 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
48212 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
48213 emit_move_insn (ireg, tmp);
48215 emit_label (label);
48216 LABEL_NUSES (label) = 1;
48218 emit_move_insn (op0, ireg);
48221 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
48222 result in OPERAND0. */
48224 ix86_expand_rint (rtx operand0, rtx operand1)
48226 /* C code for the stuff we're doing below:
48227 xa = fabs (operand1);
48228 if (!isless (xa, 2**52))
48230 xa = xa + 2**52 - 2**52;
48231 return copysign (xa, operand1);
48233 machine_mode mode = GET_MODE (operand0);
48234 rtx res, xa, TWO52, mask;
48235 rtx_code_label *label;
48237 res = gen_reg_rtx (mode);
48238 emit_move_insn (res, operand1);
48240 /* xa = abs (operand1) */
48241 xa = ix86_expand_sse_fabs (res, &mask);
48243 /* if (!isless (xa, TWO52)) goto label; */
48244 TWO52 = ix86_gen_TWO52 (mode);
48245 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48247 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48248 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48250 ix86_sse_copysign_to_positive (res, xa, res, mask);
48252 emit_label (label);
48253 LABEL_NUSES (label) = 1;
48255 emit_move_insn (operand0, res);
48258 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48261 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
48263 /* C code for the stuff we expand below.
48264 double xa = fabs (x), x2;
48265 if (!isless (xa, TWO52))
48267 xa = xa + TWO52 - TWO52;
48268 x2 = copysign (xa, x);
48277 machine_mode mode = GET_MODE (operand0);
48278 rtx xa, TWO52, tmp, one, res, mask;
48279 rtx_code_label *label;
48281 TWO52 = ix86_gen_TWO52 (mode);
48283 /* Temporary for holding the result, initialized to the input
48284 operand to ease control flow. */
48285 res = gen_reg_rtx (mode);
48286 emit_move_insn (res, operand1);
48288 /* xa = abs (operand1) */
48289 xa = ix86_expand_sse_fabs (res, &mask);
48291 /* if (!isless (xa, TWO52)) goto label; */
48292 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48294 /* xa = xa + TWO52 - TWO52; */
48295 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48296 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48298 /* xa = copysign (xa, operand1) */
48299 ix86_sse_copysign_to_positive (xa, xa, res, mask);
48301 /* generate 1.0 or -1.0 */
48302 one = force_reg (mode,
48303 const_double_from_real_value (do_floor
48304 ? dconst1 : dconstm1, mode));
48306 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48307 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48308 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48309 /* We always need to subtract here to preserve signed zero. */
48310 tmp = expand_simple_binop (mode, MINUS,
48311 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48312 emit_move_insn (res, tmp);
48314 emit_label (label);
48315 LABEL_NUSES (label) = 1;
48317 emit_move_insn (operand0, res);
48320 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48323 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
48325 /* C code for the stuff we expand below.
48326 double xa = fabs (x), x2;
48327 if (!isless (xa, TWO52))
48329 x2 = (double)(long)x;
48336 if (HONOR_SIGNED_ZEROS (mode))
48337 return copysign (x2, x);
48340 machine_mode mode = GET_MODE (operand0);
48341 rtx xa, xi, TWO52, tmp, one, res, mask;
48342 rtx_code_label *label;
48344 TWO52 = ix86_gen_TWO52 (mode);
48346 /* Temporary for holding the result, initialized to the input
48347 operand to ease control flow. */
48348 res = gen_reg_rtx (mode);
48349 emit_move_insn (res, operand1);
48351 /* xa = abs (operand1) */
48352 xa = ix86_expand_sse_fabs (res, &mask);
48354 /* if (!isless (xa, TWO52)) goto label; */
48355 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48357 /* xa = (double)(long)x */
48358 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48359 expand_fix (xi, res, 0);
48360 expand_float (xa, xi, 0);
48363 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48365 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48366 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48367 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48368 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
48369 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48370 emit_move_insn (res, tmp);
48372 if (HONOR_SIGNED_ZEROS (mode))
48373 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48375 emit_label (label);
48376 LABEL_NUSES (label) = 1;
48378 emit_move_insn (operand0, res);
48381 /* Expand SSE sequence for computing round from OPERAND1 storing
48382 into OPERAND0. Sequence that works without relying on DImode truncation
48383 via cvttsd2siq that is only available on 64bit targets. */
48385 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
48387 /* C code for the stuff we expand below.
48388 double xa = fabs (x), xa2, x2;
48389 if (!isless (xa, TWO52))
48391 Using the absolute value and copying back sign makes
48392 -0.0 -> -0.0 correct.
48393 xa2 = xa + TWO52 - TWO52;
48398 else if (dxa > 0.5)
48400 x2 = copysign (xa2, x);
48403 machine_mode mode = GET_MODE (operand0);
48404 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
48405 rtx_code_label *label;
48407 TWO52 = ix86_gen_TWO52 (mode);
48409 /* Temporary for holding the result, initialized to the input
48410 operand to ease control flow. */
48411 res = gen_reg_rtx (mode);
48412 emit_move_insn (res, operand1);
48414 /* xa = abs (operand1) */
48415 xa = ix86_expand_sse_fabs (res, &mask);
48417 /* if (!isless (xa, TWO52)) goto label; */
48418 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48420 /* xa2 = xa + TWO52 - TWO52; */
48421 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48422 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
48424 /* dxa = xa2 - xa; */
48425 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
48427 /* generate 0.5, 1.0 and -0.5 */
48428 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
48429 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
48430 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
48434 tmp = gen_reg_rtx (mode);
48435 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
48436 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
48437 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48438 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48439 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
48440 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
48441 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48442 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48444 /* res = copysign (xa2, operand1) */
48445 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
48447 emit_label (label);
48448 LABEL_NUSES (label) = 1;
48450 emit_move_insn (operand0, res);
48453 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48456 ix86_expand_trunc (rtx operand0, rtx operand1)
48458 /* C code for SSE variant we expand below.
48459 double xa = fabs (x), x2;
48460 if (!isless (xa, TWO52))
48462 x2 = (double)(long)x;
48463 if (HONOR_SIGNED_ZEROS (mode))
48464 return copysign (x2, x);
48467 machine_mode mode = GET_MODE (operand0);
48468 rtx xa, xi, TWO52, res, mask;
48469 rtx_code_label *label;
48471 TWO52 = ix86_gen_TWO52 (mode);
48473 /* Temporary for holding the result, initialized to the input
48474 operand to ease control flow. */
48475 res = gen_reg_rtx (mode);
48476 emit_move_insn (res, operand1);
48478 /* xa = abs (operand1) */
48479 xa = ix86_expand_sse_fabs (res, &mask);
48481 /* if (!isless (xa, TWO52)) goto label; */
48482 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48484 /* x = (double)(long)x */
48485 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48486 expand_fix (xi, res, 0);
48487 expand_float (res, xi, 0);
48489 if (HONOR_SIGNED_ZEROS (mode))
48490 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48492 emit_label (label);
48493 LABEL_NUSES (label) = 1;
48495 emit_move_insn (operand0, res);
48498 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48501 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48503 machine_mode mode = GET_MODE (operand0);
48504 rtx xa, mask, TWO52, one, res, smask, tmp;
48505 rtx_code_label *label;
48507 /* C code for SSE variant we expand below.
48508 double xa = fabs (x), x2;
48509 if (!isless (xa, TWO52))
48511 xa2 = xa + TWO52 - TWO52;
48515 x2 = copysign (xa2, x);
48519 TWO52 = ix86_gen_TWO52 (mode);
48521 /* Temporary for holding the result, initialized to the input
48522 operand to ease control flow. */
48523 res = gen_reg_rtx (mode);
48524 emit_move_insn (res, operand1);
48526 /* xa = abs (operand1) */
48527 xa = ix86_expand_sse_fabs (res, &smask);
48529 /* if (!isless (xa, TWO52)) goto label; */
48530 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48532 /* res = xa + TWO52 - TWO52; */
48533 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48534 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48535 emit_move_insn (res, tmp);
48538 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48540 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48541 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48542 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48543 tmp = expand_simple_binop (mode, MINUS,
48544 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48545 emit_move_insn (res, tmp);
48547 /* res = copysign (res, operand1) */
48548 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48550 emit_label (label);
48551 LABEL_NUSES (label) = 1;
48553 emit_move_insn (operand0, res);
48556 /* Expand SSE sequence for computing round from OPERAND1 storing
48559 ix86_expand_round (rtx operand0, rtx operand1)
48561 /* C code for the stuff we're doing below:
48562 double xa = fabs (x);
48563 if (!isless (xa, TWO52))
48565 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48566 return copysign (xa, x);
48568 machine_mode mode = GET_MODE (operand0);
48569 rtx res, TWO52, xa, xi, half, mask;
48570 rtx_code_label *label;
48571 const struct real_format *fmt;
48572 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48574 /* Temporary for holding the result, initialized to the input
48575 operand to ease control flow. */
48576 res = gen_reg_rtx (mode);
48577 emit_move_insn (res, operand1);
48579 TWO52 = ix86_gen_TWO52 (mode);
48580 xa = ix86_expand_sse_fabs (res, &mask);
48581 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48583 /* load nextafter (0.5, 0.0) */
48584 fmt = REAL_MODE_FORMAT (mode);
48585 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48586 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48588 /* xa = xa + 0.5 */
48589 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48590 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48592 /* xa = (double)(int64_t)xa */
48593 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48594 expand_fix (xi, xa, 0);
48595 expand_float (xa, xi, 0);
48597 /* res = copysign (xa, operand1) */
48598 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48600 emit_label (label);
48601 LABEL_NUSES (label) = 1;
48603 emit_move_insn (operand0, res);
48606 /* Expand SSE sequence for computing round
48607 from OP1 storing into OP0 using sse4 round insn. */
48609 ix86_expand_round_sse4 (rtx op0, rtx op1)
48611 machine_mode mode = GET_MODE (op0);
48612 rtx e1, e2, res, half;
48613 const struct real_format *fmt;
48614 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48615 rtx (*gen_copysign) (rtx, rtx, rtx);
48616 rtx (*gen_round) (rtx, rtx, rtx);
48621 gen_copysign = gen_copysignsf3;
48622 gen_round = gen_sse4_1_roundsf2;
48625 gen_copysign = gen_copysigndf3;
48626 gen_round = gen_sse4_1_rounddf2;
48629 gcc_unreachable ();
48632 /* round (a) = trunc (a + copysign (0.5, a)) */
48634 /* load nextafter (0.5, 0.0) */
48635 fmt = REAL_MODE_FORMAT (mode);
48636 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48637 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48638 half = const_double_from_real_value (pred_half, mode);
48640 /* e1 = copysign (0.5, op1) */
48641 e1 = gen_reg_rtx (mode);
48642 emit_insn (gen_copysign (e1, half, op1));
48644 /* e2 = op1 + e1 */
48645 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48647 /* res = trunc (e2) */
48648 res = gen_reg_rtx (mode);
48649 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48651 emit_move_insn (op0, res);
48655 /* Table of valid machine attributes. */
48656 static const struct attribute_spec ix86_attribute_table[] =
48658 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48659 affects_type_identity } */
48660 /* Stdcall attribute says callee is responsible for popping arguments
48661 if they are not variable. */
48662 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48664 /* Fastcall attribute says callee is responsible for popping arguments
48665 if they are not variable. */
48666 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48668 /* Thiscall attribute says callee is responsible for popping arguments
48669 if they are not variable. */
48670 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48672 /* Cdecl attribute says the callee is a normal C declaration */
48673 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48675 /* Regparm attribute specifies how many integer arguments are to be
48676 passed in registers. */
48677 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48679 /* Sseregparm attribute says we are using x86_64 calling conventions
48680 for FP arguments. */
48681 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48683 /* The transactional memory builtins are implicitly regparm or fastcall
48684 depending on the ABI. Override the generic do-nothing attribute that
48685 these builtins were declared with. */
48686 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48688 /* force_align_arg_pointer says this function realigns the stack at entry. */
48689 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48690 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48691 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48692 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48693 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48694 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48697 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48699 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48701 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48702 SUBTARGET_ATTRIBUTE_TABLE,
48704 /* ms_abi and sysv_abi calling convention function attributes. */
48705 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48706 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48707 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48709 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48710 ix86_handle_callee_pop_aggregate_return, true },
48712 { NULL, 0, 0, false, false, false, NULL, false }
48715 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48717 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48722 switch (type_of_cost)
48725 return ix86_cost->scalar_stmt_cost;
48728 return ix86_cost->scalar_load_cost;
48731 return ix86_cost->scalar_store_cost;
48734 return ix86_cost->vec_stmt_cost;
48737 return ix86_cost->vec_align_load_cost;
48740 return ix86_cost->vec_store_cost;
48742 case vec_to_scalar:
48743 return ix86_cost->vec_to_scalar_cost;
48745 case scalar_to_vec:
48746 return ix86_cost->scalar_to_vec_cost;
48748 case unaligned_load:
48749 case unaligned_store:
48750 return ix86_cost->vec_unalign_load_cost;
48752 case cond_branch_taken:
48753 return ix86_cost->cond_taken_branch_cost;
48755 case cond_branch_not_taken:
48756 return ix86_cost->cond_not_taken_branch_cost;
48759 case vec_promote_demote:
48760 return ix86_cost->vec_stmt_cost;
48762 case vec_construct:
48763 elements = TYPE_VECTOR_SUBPARTS (vectype);
48764 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48767 gcc_unreachable ();
48771 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48772 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48773 insn every time. */
48775 static GTY(()) rtx_insn *vselect_insn;
48777 /* Initialize vselect_insn. */
48780 init_vselect_insn (void)
48785 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48786 for (i = 0; i < MAX_VECT_LEN; ++i)
48787 XVECEXP (x, 0, i) = const0_rtx;
48788 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48790 x = gen_rtx_SET (const0_rtx, x);
48792 vselect_insn = emit_insn (x);
48796 /* Construct (set target (vec_select op0 (parallel perm))) and
48797 return true if that's a valid instruction in the active ISA. */
48800 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48801 unsigned nelt, bool testing_p)
48804 rtx x, save_vconcat;
48807 if (vselect_insn == NULL_RTX)
48808 init_vselect_insn ();
48810 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48811 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48812 for (i = 0; i < nelt; ++i)
48813 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48814 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48815 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48816 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48817 SET_DEST (PATTERN (vselect_insn)) = target;
48818 icode = recog_memoized (vselect_insn);
48820 if (icode >= 0 && !testing_p)
48821 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48823 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48824 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48825 INSN_CODE (vselect_insn) = -1;
48830 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48833 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48834 const unsigned char *perm, unsigned nelt,
48837 machine_mode v2mode;
48841 if (vselect_insn == NULL_RTX)
48842 init_vselect_insn ();
48844 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48845 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48846 PUT_MODE (x, v2mode);
48849 ok = expand_vselect (target, x, perm, nelt, testing_p);
48850 XEXP (x, 0) = const0_rtx;
48851 XEXP (x, 1) = const0_rtx;
48855 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48856 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48859 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48861 machine_mode mmode, vmode = d->vmode;
48862 unsigned i, mask, nelt = d->nelt;
48863 rtx target, op0, op1, maskop, x;
48864 rtx rperm[32], vperm;
48866 if (d->one_operand_p)
48868 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48869 && (TARGET_AVX512BW
48870 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48872 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48874 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48876 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48881 /* This is a blend, not a permute. Elements must stay in their
48882 respective lanes. */
48883 for (i = 0; i < nelt; ++i)
48885 unsigned e = d->perm[i];
48886 if (!(e == i || e == i + nelt))
48893 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48894 decision should be extracted elsewhere, so that we only try that
48895 sequence once all budget==3 options have been tried. */
48896 target = d->target;
48915 for (i = 0; i < nelt; ++i)
48916 mask |= (d->perm[i] >= nelt) << i;
48920 for (i = 0; i < 2; ++i)
48921 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48926 for (i = 0; i < 4; ++i)
48927 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48932 /* See if bytes move in pairs so we can use pblendw with
48933 an immediate argument, rather than pblendvb with a vector
48935 for (i = 0; i < 16; i += 2)
48936 if (d->perm[i] + 1 != d->perm[i + 1])
48939 for (i = 0; i < nelt; ++i)
48940 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
48943 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
48944 vperm = force_reg (vmode, vperm);
48946 if (GET_MODE_SIZE (vmode) == 16)
48947 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
48949 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
48950 if (target != d->target)
48951 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48955 for (i = 0; i < 8; ++i)
48956 mask |= (d->perm[i * 2] >= 16) << i;
48961 target = gen_reg_rtx (vmode);
48962 op0 = gen_lowpart (vmode, op0);
48963 op1 = gen_lowpart (vmode, op1);
48967 /* See if bytes move in pairs. If not, vpblendvb must be used. */
48968 for (i = 0; i < 32; i += 2)
48969 if (d->perm[i] + 1 != d->perm[i + 1])
48971 /* See if bytes move in quadruplets. If yes, vpblendd
48972 with immediate can be used. */
48973 for (i = 0; i < 32; i += 4)
48974 if (d->perm[i] + 2 != d->perm[i + 2])
48978 /* See if bytes move the same in both lanes. If yes,
48979 vpblendw with immediate can be used. */
48980 for (i = 0; i < 16; i += 2)
48981 if (d->perm[i] + 16 != d->perm[i + 16])
48984 /* Use vpblendw. */
48985 for (i = 0; i < 16; ++i)
48986 mask |= (d->perm[i * 2] >= 32) << i;
48991 /* Use vpblendd. */
48992 for (i = 0; i < 8; ++i)
48993 mask |= (d->perm[i * 4] >= 32) << i;
48998 /* See if words move in pairs. If yes, vpblendd can be used. */
48999 for (i = 0; i < 16; i += 2)
49000 if (d->perm[i] + 1 != d->perm[i + 1])
49004 /* See if words move the same in both lanes. If not,
49005 vpblendvb must be used. */
49006 for (i = 0; i < 8; i++)
49007 if (d->perm[i] + 8 != d->perm[i + 8])
49009 /* Use vpblendvb. */
49010 for (i = 0; i < 32; ++i)
49011 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
49015 target = gen_reg_rtx (vmode);
49016 op0 = gen_lowpart (vmode, op0);
49017 op1 = gen_lowpart (vmode, op1);
49018 goto finish_pblendvb;
49021 /* Use vpblendw. */
49022 for (i = 0; i < 16; ++i)
49023 mask |= (d->perm[i] >= 16) << i;
49027 /* Use vpblendd. */
49028 for (i = 0; i < 8; ++i)
49029 mask |= (d->perm[i * 2] >= 16) << i;
49034 /* Use vpblendd. */
49035 for (i = 0; i < 4; ++i)
49036 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
49041 gcc_unreachable ();
49064 if (mmode != VOIDmode)
49065 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
49067 maskop = GEN_INT (mask);
49069 /* This matches five different patterns with the different modes. */
49070 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
49071 x = gen_rtx_SET (target, x);
49073 if (target != d->target)
49074 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49079 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49080 in terms of the variable form of vpermilps.
49082 Note that we will have already failed the immediate input vpermilps,
49083 which requires that the high and low part shuffle be identical; the
49084 variable form doesn't require that. */
49087 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
49089 rtx rperm[8], vperm;
49092 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
49095 /* We can only permute within the 128-bit lane. */
49096 for (i = 0; i < 8; ++i)
49098 unsigned e = d->perm[i];
49099 if (i < 4 ? e >= 4 : e < 4)
49106 for (i = 0; i < 8; ++i)
49108 unsigned e = d->perm[i];
49110 /* Within each 128-bit lane, the elements of op0 are numbered
49111 from 0 and the elements of op1 are numbered from 4. */
49117 rperm[i] = GEN_INT (e);
49120 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
49121 vperm = force_reg (V8SImode, vperm);
49122 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
49127 /* Return true if permutation D can be performed as VMODE permutation
49131 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
49133 unsigned int i, j, chunk;
49135 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
49136 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
49137 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
49140 if (GET_MODE_NUNITS (vmode) >= d->nelt)
49143 chunk = d->nelt / GET_MODE_NUNITS (vmode);
49144 for (i = 0; i < d->nelt; i += chunk)
49145 if (d->perm[i] & (chunk - 1))
49148 for (j = 1; j < chunk; ++j)
49149 if (d->perm[i] + j != d->perm[i + j])
49155 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49156 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
49159 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
49161 unsigned i, nelt, eltsz, mask;
49162 unsigned char perm[64];
49163 machine_mode vmode = V16QImode;
49164 rtx rperm[64], vperm, target, op0, op1;
49168 if (!d->one_operand_p)
49170 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
49173 && valid_perm_using_mode_p (V2TImode, d))
49178 /* Use vperm2i128 insn. The pattern uses
49179 V4DImode instead of V2TImode. */
49180 target = d->target;
49181 if (d->vmode != V4DImode)
49182 target = gen_reg_rtx (V4DImode);
49183 op0 = gen_lowpart (V4DImode, d->op0);
49184 op1 = gen_lowpart (V4DImode, d->op1);
49186 = GEN_INT ((d->perm[0] / (nelt / 2))
49187 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
49188 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
49189 if (target != d->target)
49190 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49198 if (GET_MODE_SIZE (d->vmode) == 16)
49203 else if (GET_MODE_SIZE (d->vmode) == 32)
49208 /* V4DImode should be already handled through
49209 expand_vselect by vpermq instruction. */
49210 gcc_assert (d->vmode != V4DImode);
49213 if (d->vmode == V8SImode
49214 || d->vmode == V16HImode
49215 || d->vmode == V32QImode)
49217 /* First see if vpermq can be used for
49218 V8SImode/V16HImode/V32QImode. */
49219 if (valid_perm_using_mode_p (V4DImode, d))
49221 for (i = 0; i < 4; i++)
49222 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
49225 target = gen_reg_rtx (V4DImode);
49226 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
49229 emit_move_insn (d->target,
49230 gen_lowpart (d->vmode, target));
49236 /* Next see if vpermd can be used. */
49237 if (valid_perm_using_mode_p (V8SImode, d))
49240 /* Or if vpermps can be used. */
49241 else if (d->vmode == V8SFmode)
49244 if (vmode == V32QImode)
49246 /* vpshufb only works intra lanes, it is not
49247 possible to shuffle bytes in between the lanes. */
49248 for (i = 0; i < nelt; ++i)
49249 if ((d->perm[i] ^ i) & (nelt / 2))
49253 else if (GET_MODE_SIZE (d->vmode) == 64)
49255 if (!TARGET_AVX512BW)
49258 /* If vpermq didn't work, vpshufb won't work either. */
49259 if (d->vmode == V8DFmode || d->vmode == V8DImode)
49263 if (d->vmode == V16SImode
49264 || d->vmode == V32HImode
49265 || d->vmode == V64QImode)
49267 /* First see if vpermq can be used for
49268 V16SImode/V32HImode/V64QImode. */
49269 if (valid_perm_using_mode_p (V8DImode, d))
49271 for (i = 0; i < 8; i++)
49272 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
49275 target = gen_reg_rtx (V8DImode);
49276 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
49279 emit_move_insn (d->target,
49280 gen_lowpart (d->vmode, target));
49286 /* Next see if vpermd can be used. */
49287 if (valid_perm_using_mode_p (V16SImode, d))
49290 /* Or if vpermps can be used. */
49291 else if (d->vmode == V16SFmode)
49293 if (vmode == V64QImode)
49295 /* vpshufb only works intra lanes, it is not
49296 possible to shuffle bytes in between the lanes. */
49297 for (i = 0; i < nelt; ++i)
49298 if ((d->perm[i] ^ i) & (nelt / 4))
49309 if (vmode == V8SImode)
49310 for (i = 0; i < 8; ++i)
49311 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
49312 else if (vmode == V16SImode)
49313 for (i = 0; i < 16; ++i)
49314 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
49317 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49318 if (!d->one_operand_p)
49319 mask = 2 * nelt - 1;
49320 else if (vmode == V16QImode)
49322 else if (vmode == V64QImode)
49323 mask = nelt / 4 - 1;
49325 mask = nelt / 2 - 1;
49327 for (i = 0; i < nelt; ++i)
49329 unsigned j, e = d->perm[i] & mask;
49330 for (j = 0; j < eltsz; ++j)
49331 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
49335 vperm = gen_rtx_CONST_VECTOR (vmode,
49336 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
49337 vperm = force_reg (vmode, vperm);
49339 target = d->target;
49340 if (d->vmode != vmode)
49341 target = gen_reg_rtx (vmode);
49342 op0 = gen_lowpart (vmode, d->op0);
49343 if (d->one_operand_p)
49345 if (vmode == V16QImode)
49346 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
49347 else if (vmode == V32QImode)
49348 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
49349 else if (vmode == V64QImode)
49350 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
49351 else if (vmode == V8SFmode)
49352 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
49353 else if (vmode == V8SImode)
49354 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
49355 else if (vmode == V16SFmode)
49356 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
49357 else if (vmode == V16SImode)
49358 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
49360 gcc_unreachable ();
49364 op1 = gen_lowpart (vmode, d->op1);
49365 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
49367 if (target != d->target)
49368 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49373 /* For V*[QHS]Imode permutations, check if the same permutation
49374 can't be performed in a 2x, 4x or 8x wider inner mode. */
49377 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
49378 struct expand_vec_perm_d *nd)
49381 enum machine_mode mode = VOIDmode;
49385 case V16QImode: mode = V8HImode; break;
49386 case V32QImode: mode = V16HImode; break;
49387 case V64QImode: mode = V32HImode; break;
49388 case V8HImode: mode = V4SImode; break;
49389 case V16HImode: mode = V8SImode; break;
49390 case V32HImode: mode = V16SImode; break;
49391 case V4SImode: mode = V2DImode; break;
49392 case V8SImode: mode = V4DImode; break;
49393 case V16SImode: mode = V8DImode; break;
49394 default: return false;
49396 for (i = 0; i < d->nelt; i += 2)
49397 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
49400 nd->nelt = d->nelt / 2;
49401 for (i = 0; i < nd->nelt; i++)
49402 nd->perm[i] = d->perm[2 * i] / 2;
49403 if (GET_MODE_INNER (mode) != DImode)
49404 canonicalize_vector_int_perm (nd, nd);
49407 nd->one_operand_p = d->one_operand_p;
49408 nd->testing_p = d->testing_p;
49409 if (d->op0 == d->op1)
49410 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
49413 nd->op0 = gen_lowpart (nd->vmode, d->op0);
49414 nd->op1 = gen_lowpart (nd->vmode, d->op1);
49417 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
49419 nd->target = gen_reg_rtx (nd->vmode);
49424 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
49425 in a single instruction. */
49428 expand_vec_perm_1 (struct expand_vec_perm_d *d)
49430 unsigned i, nelt = d->nelt;
49431 struct expand_vec_perm_d nd;
49433 /* Check plain VEC_SELECT first, because AVX has instructions that could
49434 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
49435 input where SEL+CONCAT may not. */
49436 if (d->one_operand_p)
49438 int mask = nelt - 1;
49439 bool identity_perm = true;
49440 bool broadcast_perm = true;
49442 for (i = 0; i < nelt; i++)
49444 nd.perm[i] = d->perm[i] & mask;
49445 if (nd.perm[i] != i)
49446 identity_perm = false;
49448 broadcast_perm = false;
49454 emit_move_insn (d->target, d->op0);
49457 else if (broadcast_perm && TARGET_AVX2)
49459 /* Use vpbroadcast{b,w,d}. */
49460 rtx (*gen) (rtx, rtx) = NULL;
49464 if (TARGET_AVX512BW)
49465 gen = gen_avx512bw_vec_dupv64qi_1;
49468 gen = gen_avx2_pbroadcastv32qi_1;
49471 if (TARGET_AVX512BW)
49472 gen = gen_avx512bw_vec_dupv32hi_1;
49475 gen = gen_avx2_pbroadcastv16hi_1;
49478 if (TARGET_AVX512F)
49479 gen = gen_avx512f_vec_dupv16si_1;
49482 gen = gen_avx2_pbroadcastv8si_1;
49485 gen = gen_avx2_pbroadcastv16qi;
49488 gen = gen_avx2_pbroadcastv8hi;
49491 if (TARGET_AVX512F)
49492 gen = gen_avx512f_vec_dupv16sf_1;
49495 gen = gen_avx2_vec_dupv8sf_1;
49498 if (TARGET_AVX512F)
49499 gen = gen_avx512f_vec_dupv8df_1;
49502 if (TARGET_AVX512F)
49503 gen = gen_avx512f_vec_dupv8di_1;
49505 /* For other modes prefer other shuffles this function creates. */
49511 emit_insn (gen (d->target, d->op0));
49516 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
49519 /* There are plenty of patterns in sse.md that are written for
49520 SEL+CONCAT and are not replicated for a single op. Perhaps
49521 that should be changed, to avoid the nastiness here. */
49523 /* Recognize interleave style patterns, which means incrementing
49524 every other permutation operand. */
49525 for (i = 0; i < nelt; i += 2)
49527 nd.perm[i] = d->perm[i] & mask;
49528 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
49530 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49534 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
49537 for (i = 0; i < nelt; i += 4)
49539 nd.perm[i + 0] = d->perm[i + 0] & mask;
49540 nd.perm[i + 1] = d->perm[i + 1] & mask;
49541 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
49542 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
49545 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49551 /* Finally, try the fully general two operand permute. */
49552 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49556 /* Recognize interleave style patterns with reversed operands. */
49557 if (!d->one_operand_p)
49559 for (i = 0; i < nelt; ++i)
49561 unsigned e = d->perm[i];
49569 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
49574 /* Try the SSE4.1 blend variable merge instructions. */
49575 if (expand_vec_perm_blend (d))
49578 /* Try one of the AVX vpermil variable permutations. */
49579 if (expand_vec_perm_vpermil (d))
49582 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49583 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49584 if (expand_vec_perm_pshufb (d))
49587 /* Try the AVX2 vpalignr instruction. */
49588 if (expand_vec_perm_palignr (d, true))
49591 /* Try the AVX512F vpermi2 instructions. */
49592 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49595 /* See if we can get the same permutation in different vector integer
49597 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49600 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49606 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49607 in terms of a pair of pshuflw + pshufhw instructions. */
49610 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49612 unsigned char perm2[MAX_VECT_LEN];
49616 if (d->vmode != V8HImode || !d->one_operand_p)
49619 /* The two permutations only operate in 64-bit lanes. */
49620 for (i = 0; i < 4; ++i)
49621 if (d->perm[i] >= 4)
49623 for (i = 4; i < 8; ++i)
49624 if (d->perm[i] < 4)
49630 /* Emit the pshuflw. */
49631 memcpy (perm2, d->perm, 4);
49632 for (i = 4; i < 8; ++i)
49634 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49637 /* Emit the pshufhw. */
49638 memcpy (perm2 + 4, d->perm + 4, 4);
49639 for (i = 0; i < 4; ++i)
49641 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49647 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49648 the permutation using the SSSE3 palignr instruction. This succeeds
49649 when all of the elements in PERM fit within one vector and we merely
49650 need to shift them down so that a single vector permutation has a
49651 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49652 the vpalignr instruction itself can perform the requested permutation. */
49655 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49657 unsigned i, nelt = d->nelt;
49658 unsigned min, max, minswap, maxswap;
49659 bool in_order, ok, swap = false;
49661 struct expand_vec_perm_d dcopy;
49663 /* Even with AVX, palignr only operates on 128-bit vectors,
49664 in AVX2 palignr operates on both 128-bit lanes. */
49665 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49666 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49671 minswap = 2 * nelt;
49673 for (i = 0; i < nelt; ++i)
49675 unsigned e = d->perm[i];
49676 unsigned eswap = d->perm[i] ^ nelt;
49677 if (GET_MODE_SIZE (d->vmode) == 32)
49679 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49680 eswap = e ^ (nelt / 2);
49686 if (eswap < minswap)
49688 if (eswap > maxswap)
49692 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49694 if (d->one_operand_p
49696 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49697 ? nelt / 2 : nelt))
49704 /* Given that we have SSSE3, we know we'll be able to implement the
49705 single operand permutation after the palignr with pshufb for
49706 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49708 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49714 dcopy.op0 = d->op1;
49715 dcopy.op1 = d->op0;
49716 for (i = 0; i < nelt; ++i)
49717 dcopy.perm[i] ^= nelt;
49721 for (i = 0; i < nelt; ++i)
49723 unsigned e = dcopy.perm[i];
49724 if (GET_MODE_SIZE (d->vmode) == 32
49726 && (e & (nelt / 2 - 1)) < min)
49727 e = e - min - (nelt / 2);
49734 dcopy.one_operand_p = true;
49736 if (single_insn_only_p && !in_order)
49739 /* For AVX2, test whether we can permute the result in one instruction. */
49744 dcopy.op1 = dcopy.op0;
49745 return expand_vec_perm_1 (&dcopy);
49748 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49749 if (GET_MODE_SIZE (d->vmode) == 16)
49751 target = gen_reg_rtx (TImode);
49752 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49753 gen_lowpart (TImode, dcopy.op0), shift));
49757 target = gen_reg_rtx (V2TImode);
49758 emit_insn (gen_avx2_palignrv2ti (target,
49759 gen_lowpart (V2TImode, dcopy.op1),
49760 gen_lowpart (V2TImode, dcopy.op0),
49764 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49766 /* Test for the degenerate case where the alignment by itself
49767 produces the desired permutation. */
49770 emit_move_insn (d->target, dcopy.op0);
49774 ok = expand_vec_perm_1 (&dcopy);
49775 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49780 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49781 the permutation using the SSE4_1 pblendv instruction. Potentially
49782 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49785 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49787 unsigned i, which, nelt = d->nelt;
49788 struct expand_vec_perm_d dcopy, dcopy1;
49789 machine_mode vmode = d->vmode;
49792 /* Use the same checks as in expand_vec_perm_blend. */
49793 if (d->one_operand_p)
49795 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49797 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49799 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49804 /* Figure out where permutation elements stay not in their
49805 respective lanes. */
49806 for (i = 0, which = 0; i < nelt; ++i)
49808 unsigned e = d->perm[i];
49810 which |= (e < nelt ? 1 : 2);
49812 /* We can pblend the part where elements stay not in their
49813 respective lanes only when these elements are all in one
49814 half of a permutation.
49815 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49816 lanes, but both 8 and 9 >= 8
49817 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49818 respective lanes and 8 >= 8, but 2 not. */
49819 if (which != 1 && which != 2)
49821 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49824 /* First we apply one operand permutation to the part where
49825 elements stay not in their respective lanes. */
49828 dcopy.op0 = dcopy.op1 = d->op1;
49830 dcopy.op0 = dcopy.op1 = d->op0;
49832 dcopy.target = gen_reg_rtx (vmode);
49833 dcopy.one_operand_p = true;
49835 for (i = 0; i < nelt; ++i)
49836 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49838 ok = expand_vec_perm_1 (&dcopy);
49839 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49846 /* Next we put permuted elements into their positions. */
49849 dcopy1.op1 = dcopy.target;
49851 dcopy1.op0 = dcopy.target;
49853 for (i = 0; i < nelt; ++i)
49854 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49856 ok = expand_vec_perm_blend (&dcopy1);
49862 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49865 a two vector permutation into a single vector permutation by using
49866 an interleave operation to merge the vectors. */
49869 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49871 struct expand_vec_perm_d dremap, dfinal;
49872 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49873 unsigned HOST_WIDE_INT contents;
49874 unsigned char remap[2 * MAX_VECT_LEN];
49876 bool ok, same_halves = false;
49878 if (GET_MODE_SIZE (d->vmode) == 16)
49880 if (d->one_operand_p)
49883 else if (GET_MODE_SIZE (d->vmode) == 32)
49887 /* For 32-byte modes allow even d->one_operand_p.
49888 The lack of cross-lane shuffling in some instructions
49889 might prevent a single insn shuffle. */
49891 dfinal.testing_p = true;
49892 /* If expand_vec_perm_interleave3 can expand this into
49893 a 3 insn sequence, give up and let it be expanded as
49894 3 insn sequence. While that is one insn longer,
49895 it doesn't need a memory operand and in the common
49896 case that both interleave low and high permutations
49897 with the same operands are adjacent needs 4 insns
49898 for both after CSE. */
49899 if (expand_vec_perm_interleave3 (&dfinal))
49905 /* Examine from whence the elements come. */
49907 for (i = 0; i < nelt; ++i)
49908 contents |= HOST_WIDE_INT_1U << d->perm[i];
49910 memset (remap, 0xff, sizeof (remap));
49913 if (GET_MODE_SIZE (d->vmode) == 16)
49915 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49917 /* Split the two input vectors into 4 halves. */
49918 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49923 /* If the elements from the low halves use interleave low, and similarly
49924 for interleave high. If the elements are from mis-matched halves, we
49925 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49926 if ((contents & (h1 | h3)) == contents)
49929 for (i = 0; i < nelt2; ++i)
49932 remap[i + nelt] = i * 2 + 1;
49933 dremap.perm[i * 2] = i;
49934 dremap.perm[i * 2 + 1] = i + nelt;
49936 if (!TARGET_SSE2 && d->vmode == V4SImode)
49937 dremap.vmode = V4SFmode;
49939 else if ((contents & (h2 | h4)) == contents)
49942 for (i = 0; i < nelt2; ++i)
49944 remap[i + nelt2] = i * 2;
49945 remap[i + nelt + nelt2] = i * 2 + 1;
49946 dremap.perm[i * 2] = i + nelt2;
49947 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
49949 if (!TARGET_SSE2 && d->vmode == V4SImode)
49950 dremap.vmode = V4SFmode;
49952 else if ((contents & (h1 | h4)) == contents)
49955 for (i = 0; i < nelt2; ++i)
49958 remap[i + nelt + nelt2] = i + nelt2;
49959 dremap.perm[i] = i;
49960 dremap.perm[i + nelt2] = i + nelt + nelt2;
49965 dremap.vmode = V2DImode;
49967 dremap.perm[0] = 0;
49968 dremap.perm[1] = 3;
49971 else if ((contents & (h2 | h3)) == contents)
49974 for (i = 0; i < nelt2; ++i)
49976 remap[i + nelt2] = i;
49977 remap[i + nelt] = i + nelt2;
49978 dremap.perm[i] = i + nelt2;
49979 dremap.perm[i + nelt2] = i + nelt;
49984 dremap.vmode = V2DImode;
49986 dremap.perm[0] = 1;
49987 dremap.perm[1] = 2;
49995 unsigned int nelt4 = nelt / 4, nzcnt = 0;
49996 unsigned HOST_WIDE_INT q[8];
49997 unsigned int nonzero_halves[4];
49999 /* Split the two input vectors into 8 quarters. */
50000 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
50001 for (i = 1; i < 8; ++i)
50002 q[i] = q[0] << (nelt4 * i);
50003 for (i = 0; i < 4; ++i)
50004 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
50006 nonzero_halves[nzcnt] = i;
50012 gcc_assert (d->one_operand_p);
50013 nonzero_halves[1] = nonzero_halves[0];
50014 same_halves = true;
50016 else if (d->one_operand_p)
50018 gcc_assert (nonzero_halves[0] == 0);
50019 gcc_assert (nonzero_halves[1] == 1);
50024 if (d->perm[0] / nelt2 == nonzero_halves[1])
50026 /* Attempt to increase the likelihood that dfinal
50027 shuffle will be intra-lane. */
50028 std::swap (nonzero_halves[0], nonzero_halves[1]);
50031 /* vperm2f128 or vperm2i128. */
50032 for (i = 0; i < nelt2; ++i)
50034 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
50035 remap[i + nonzero_halves[0] * nelt2] = i;
50036 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
50037 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
50040 if (d->vmode != V8SFmode
50041 && d->vmode != V4DFmode
50042 && d->vmode != V8SImode)
50044 dremap.vmode = V8SImode;
50046 for (i = 0; i < 4; ++i)
50048 dremap.perm[i] = i + nonzero_halves[0] * 4;
50049 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
50053 else if (d->one_operand_p)
50055 else if (TARGET_AVX2
50056 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
50059 for (i = 0; i < nelt4; ++i)
50062 remap[i + nelt] = i * 2 + 1;
50063 remap[i + nelt2] = i * 2 + nelt2;
50064 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
50065 dremap.perm[i * 2] = i;
50066 dremap.perm[i * 2 + 1] = i + nelt;
50067 dremap.perm[i * 2 + nelt2] = i + nelt2;
50068 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
50071 else if (TARGET_AVX2
50072 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
50075 for (i = 0; i < nelt4; ++i)
50077 remap[i + nelt4] = i * 2;
50078 remap[i + nelt + nelt4] = i * 2 + 1;
50079 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
50080 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
50081 dremap.perm[i * 2] = i + nelt4;
50082 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
50083 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
50084 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
50091 /* Use the remapping array set up above to move the elements from their
50092 swizzled locations into their final destinations. */
50094 for (i = 0; i < nelt; ++i)
50096 unsigned e = remap[d->perm[i]];
50097 gcc_assert (e < nelt);
50098 /* If same_halves is true, both halves of the remapped vector are the
50099 same. Avoid cross-lane accesses if possible. */
50100 if (same_halves && i >= nelt2)
50102 gcc_assert (e < nelt2);
50103 dfinal.perm[i] = e + nelt2;
50106 dfinal.perm[i] = e;
50110 dremap.target = gen_reg_rtx (dremap.vmode);
50111 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50113 dfinal.op1 = dfinal.op0;
50114 dfinal.one_operand_p = true;
50116 /* Test if the final remap can be done with a single insn. For V4SFmode or
50117 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
50119 ok = expand_vec_perm_1 (&dfinal);
50120 seq = get_insns ();
50129 if (dremap.vmode != dfinal.vmode)
50131 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
50132 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
50135 ok = expand_vec_perm_1 (&dremap);
50142 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50143 a single vector cross-lane permutation into vpermq followed
50144 by any of the single insn permutations. */
50147 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
50149 struct expand_vec_perm_d dremap, dfinal;
50150 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
50151 unsigned contents[2];
50155 && (d->vmode == V32QImode || d->vmode == V16HImode)
50156 && d->one_operand_p))
50161 for (i = 0; i < nelt2; ++i)
50163 contents[0] |= 1u << (d->perm[i] / nelt4);
50164 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
50167 for (i = 0; i < 2; ++i)
50169 unsigned int cnt = 0;
50170 for (j = 0; j < 4; ++j)
50171 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
50179 dremap.vmode = V4DImode;
50181 dremap.target = gen_reg_rtx (V4DImode);
50182 dremap.op0 = gen_lowpart (V4DImode, d->op0);
50183 dremap.op1 = dremap.op0;
50184 dremap.one_operand_p = true;
50185 for (i = 0; i < 2; ++i)
50187 unsigned int cnt = 0;
50188 for (j = 0; j < 4; ++j)
50189 if ((contents[i] & (1u << j)) != 0)
50190 dremap.perm[2 * i + cnt++] = j;
50191 for (; cnt < 2; ++cnt)
50192 dremap.perm[2 * i + cnt] = 0;
50196 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50197 dfinal.op1 = dfinal.op0;
50198 dfinal.one_operand_p = true;
50199 for (i = 0, j = 0; i < nelt; ++i)
50203 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
50204 if ((d->perm[i] / nelt4) == dremap.perm[j])
50206 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
50207 dfinal.perm[i] |= nelt4;
50209 gcc_unreachable ();
50212 ok = expand_vec_perm_1 (&dremap);
50215 ok = expand_vec_perm_1 (&dfinal);
50221 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
50222 a vector permutation using two instructions, vperm2f128 resp.
50223 vperm2i128 followed by any single in-lane permutation. */
50226 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
50228 struct expand_vec_perm_d dfirst, dsecond;
50229 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
50233 || GET_MODE_SIZE (d->vmode) != 32
50234 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
50238 dsecond.one_operand_p = false;
50239 dsecond.testing_p = true;
50241 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
50242 immediate. For perm < 16 the second permutation uses
50243 d->op0 as first operand, for perm >= 16 it uses d->op1
50244 as first operand. The second operand is the result of
50246 for (perm = 0; perm < 32; perm++)
50248 /* Ignore permutations which do not move anything cross-lane. */
50251 /* The second shuffle for e.g. V4DFmode has
50252 0123 and ABCD operands.
50253 Ignore AB23, as 23 is already in the second lane
50254 of the first operand. */
50255 if ((perm & 0xc) == (1 << 2)) continue;
50256 /* And 01CD, as 01 is in the first lane of the first
50258 if ((perm & 3) == 0) continue;
50259 /* And 4567, as then the vperm2[fi]128 doesn't change
50260 anything on the original 4567 second operand. */
50261 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
50265 /* The second shuffle for e.g. V4DFmode has
50266 4567 and ABCD operands.
50267 Ignore AB67, as 67 is already in the second lane
50268 of the first operand. */
50269 if ((perm & 0xc) == (3 << 2)) continue;
50270 /* And 45CD, as 45 is in the first lane of the first
50272 if ((perm & 3) == 2) continue;
50273 /* And 0123, as then the vperm2[fi]128 doesn't change
50274 anything on the original 0123 first operand. */
50275 if ((perm & 0xf) == (1 << 2)) continue;
50278 for (i = 0; i < nelt; i++)
50280 j = d->perm[i] / nelt2;
50281 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
50282 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
50283 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
50284 dsecond.perm[i] = d->perm[i] & (nelt - 1);
50292 ok = expand_vec_perm_1 (&dsecond);
50303 /* Found a usable second shuffle. dfirst will be
50304 vperm2f128 on d->op0 and d->op1. */
50305 dsecond.testing_p = false;
50307 dfirst.target = gen_reg_rtx (d->vmode);
50308 for (i = 0; i < nelt; i++)
50309 dfirst.perm[i] = (i & (nelt2 - 1))
50310 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
50312 canonicalize_perm (&dfirst);
50313 ok = expand_vec_perm_1 (&dfirst);
50316 /* And dsecond is some single insn shuffle, taking
50317 d->op0 and result of vperm2f128 (if perm < 16) or
50318 d->op1 and result of vperm2f128 (otherwise). */
50320 dsecond.op0 = dsecond.op1;
50321 dsecond.op1 = dfirst.target;
50323 ok = expand_vec_perm_1 (&dsecond);
50329 /* For one operand, the only useful vperm2f128 permutation is 0x01
50331 if (d->one_operand_p)
50338 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50339 a two vector permutation using 2 intra-lane interleave insns
50340 and cross-lane shuffle for 32-byte vectors. */
50343 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
50346 rtx (*gen) (rtx, rtx, rtx);
50348 if (d->one_operand_p)
50350 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
50352 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
50358 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
50360 for (i = 0; i < nelt; i += 2)
50361 if (d->perm[i] != d->perm[0] + i / 2
50362 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
50372 gen = gen_vec_interleave_highv32qi;
50374 gen = gen_vec_interleave_lowv32qi;
50378 gen = gen_vec_interleave_highv16hi;
50380 gen = gen_vec_interleave_lowv16hi;
50384 gen = gen_vec_interleave_highv8si;
50386 gen = gen_vec_interleave_lowv8si;
50390 gen = gen_vec_interleave_highv4di;
50392 gen = gen_vec_interleave_lowv4di;
50396 gen = gen_vec_interleave_highv8sf;
50398 gen = gen_vec_interleave_lowv8sf;
50402 gen = gen_vec_interleave_highv4df;
50404 gen = gen_vec_interleave_lowv4df;
50407 gcc_unreachable ();
50410 emit_insn (gen (d->target, d->op0, d->op1));
50414 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
50415 a single vector permutation using a single intra-lane vector
50416 permutation, vperm2f128 swapping the lanes and vblend* insn blending
50417 the non-swapped and swapped vectors together. */
50420 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
50422 struct expand_vec_perm_d dfirst, dsecond;
50423 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
50426 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
50430 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
50431 || !d->one_operand_p)
50435 for (i = 0; i < nelt; i++)
50436 dfirst.perm[i] = 0xff;
50437 for (i = 0, msk = 0; i < nelt; i++)
50439 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
50440 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
50442 dfirst.perm[j] = d->perm[i];
50446 for (i = 0; i < nelt; i++)
50447 if (dfirst.perm[i] == 0xff)
50448 dfirst.perm[i] = i;
50451 dfirst.target = gen_reg_rtx (dfirst.vmode);
50454 ok = expand_vec_perm_1 (&dfirst);
50455 seq = get_insns ();
50467 dsecond.op0 = dfirst.target;
50468 dsecond.op1 = dfirst.target;
50469 dsecond.one_operand_p = true;
50470 dsecond.target = gen_reg_rtx (dsecond.vmode);
50471 for (i = 0; i < nelt; i++)
50472 dsecond.perm[i] = i ^ nelt2;
50474 ok = expand_vec_perm_1 (&dsecond);
50477 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
50478 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
50482 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
50483 permutation using two vperm2f128, followed by a vshufpd insn blending
50484 the two vectors together. */
50487 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
50489 struct expand_vec_perm_d dfirst, dsecond, dthird;
50492 if (!TARGET_AVX || (d->vmode != V4DFmode))
50502 dfirst.perm[0] = (d->perm[0] & ~1);
50503 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
50504 dfirst.perm[2] = (d->perm[2] & ~1);
50505 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
50506 dsecond.perm[0] = (d->perm[1] & ~1);
50507 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
50508 dsecond.perm[2] = (d->perm[3] & ~1);
50509 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
50510 dthird.perm[0] = (d->perm[0] % 2);
50511 dthird.perm[1] = (d->perm[1] % 2) + 4;
50512 dthird.perm[2] = (d->perm[2] % 2) + 2;
50513 dthird.perm[3] = (d->perm[3] % 2) + 6;
50515 dfirst.target = gen_reg_rtx (dfirst.vmode);
50516 dsecond.target = gen_reg_rtx (dsecond.vmode);
50517 dthird.op0 = dfirst.target;
50518 dthird.op1 = dsecond.target;
50519 dthird.one_operand_p = false;
50521 canonicalize_perm (&dfirst);
50522 canonicalize_perm (&dsecond);
50524 ok = expand_vec_perm_1 (&dfirst)
50525 && expand_vec_perm_1 (&dsecond)
50526 && expand_vec_perm_1 (&dthird);
50533 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
50534 permutation with two pshufb insns and an ior. We should have already
50535 failed all two instruction sequences. */
50538 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
50540 rtx rperm[2][16], vperm, l, h, op, m128;
50541 unsigned int i, nelt, eltsz;
50543 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50545 gcc_assert (!d->one_operand_p);
50551 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50553 /* Generate two permutation masks. If the required element is within
50554 the given vector it is shuffled into the proper lane. If the required
50555 element is in the other vector, force a zero into the lane by setting
50556 bit 7 in the permutation mask. */
50557 m128 = GEN_INT (-128);
50558 for (i = 0; i < nelt; ++i)
50560 unsigned j, e = d->perm[i];
50561 unsigned which = (e >= nelt);
50565 for (j = 0; j < eltsz; ++j)
50567 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50568 rperm[1-which][i*eltsz + j] = m128;
50572 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50573 vperm = force_reg (V16QImode, vperm);
50575 l = gen_reg_rtx (V16QImode);
50576 op = gen_lowpart (V16QImode, d->op0);
50577 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50579 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50580 vperm = force_reg (V16QImode, vperm);
50582 h = gen_reg_rtx (V16QImode);
50583 op = gen_lowpart (V16QImode, d->op1);
50584 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50587 if (d->vmode != V16QImode)
50588 op = gen_reg_rtx (V16QImode);
50589 emit_insn (gen_iorv16qi3 (op, l, h));
50590 if (op != d->target)
50591 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50596 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50597 with two vpshufb insns, vpermq and vpor. We should have already failed
50598 all two or three instruction sequences. */
50601 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50603 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50604 unsigned int i, nelt, eltsz;
50607 || !d->one_operand_p
50608 || (d->vmode != V32QImode && d->vmode != V16HImode))
50615 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50617 /* Generate two permutation masks. If the required element is within
50618 the same lane, it is shuffled in. If the required element from the
50619 other lane, force a zero by setting bit 7 in the permutation mask.
50620 In the other mask the mask has non-negative elements if element
50621 is requested from the other lane, but also moved to the other lane,
50622 so that the result of vpshufb can have the two V2TImode halves
50624 m128 = GEN_INT (-128);
50625 for (i = 0; i < nelt; ++i)
50627 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50628 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50630 for (j = 0; j < eltsz; ++j)
50632 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50633 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50637 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50638 vperm = force_reg (V32QImode, vperm);
50640 h = gen_reg_rtx (V32QImode);
50641 op = gen_lowpart (V32QImode, d->op0);
50642 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50644 /* Swap the 128-byte lanes of h into hp. */
50645 hp = gen_reg_rtx (V4DImode);
50646 op = gen_lowpart (V4DImode, h);
50647 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50650 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50651 vperm = force_reg (V32QImode, vperm);
50653 l = gen_reg_rtx (V32QImode);
50654 op = gen_lowpart (V32QImode, d->op0);
50655 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50658 if (d->vmode != V32QImode)
50659 op = gen_reg_rtx (V32QImode);
50660 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50661 if (op != d->target)
50662 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50667 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50668 and extract-odd permutations of two V32QImode and V16QImode operand
50669 with two vpshufb insns, vpor and vpermq. We should have already
50670 failed all two or three instruction sequences. */
50673 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50675 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50676 unsigned int i, nelt, eltsz;
50679 || d->one_operand_p
50680 || (d->vmode != V32QImode && d->vmode != V16HImode))
50683 for (i = 0; i < d->nelt; ++i)
50684 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50691 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50693 /* Generate two permutation masks. In the first permutation mask
50694 the first quarter will contain indexes for the first half
50695 of the op0, the second quarter will contain bit 7 set, third quarter
50696 will contain indexes for the second half of the op0 and the
50697 last quarter bit 7 set. In the second permutation mask
50698 the first quarter will contain bit 7 set, the second quarter
50699 indexes for the first half of the op1, the third quarter bit 7 set
50700 and last quarter indexes for the second half of the op1.
50701 I.e. the first mask e.g. for V32QImode extract even will be:
50702 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50703 (all values masked with 0xf except for -128) and second mask
50704 for extract even will be
50705 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50706 m128 = GEN_INT (-128);
50707 for (i = 0; i < nelt; ++i)
50709 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50710 unsigned which = d->perm[i] >= nelt;
50711 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50713 for (j = 0; j < eltsz; ++j)
50715 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50716 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50720 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50721 vperm = force_reg (V32QImode, vperm);
50723 l = gen_reg_rtx (V32QImode);
50724 op = gen_lowpart (V32QImode, d->op0);
50725 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50727 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50728 vperm = force_reg (V32QImode, vperm);
50730 h = gen_reg_rtx (V32QImode);
50731 op = gen_lowpart (V32QImode, d->op1);
50732 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50734 ior = gen_reg_rtx (V32QImode);
50735 emit_insn (gen_iorv32qi3 (ior, l, h));
50737 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50738 op = gen_reg_rtx (V4DImode);
50739 ior = gen_lowpart (V4DImode, ior);
50740 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50741 const1_rtx, GEN_INT (3)));
50742 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50747 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50748 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50749 with two "and" and "pack" or two "shift" and "pack" insns. We should
50750 have already failed all two instruction sequences. */
50753 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50755 rtx op, dop0, dop1, t, rperm[16];
50756 unsigned i, odd, c, s, nelt = d->nelt;
50757 bool end_perm = false;
50758 machine_mode half_mode;
50759 rtx (*gen_and) (rtx, rtx, rtx);
50760 rtx (*gen_pack) (rtx, rtx, rtx);
50761 rtx (*gen_shift) (rtx, rtx, rtx);
50763 if (d->one_operand_p)
50769 /* Required for "pack". */
50770 if (!TARGET_SSE4_1)
50774 half_mode = V4SImode;
50775 gen_and = gen_andv4si3;
50776 gen_pack = gen_sse4_1_packusdw;
50777 gen_shift = gen_lshrv4si3;
50780 /* No check as all instructions are SSE2. */
50783 half_mode = V8HImode;
50784 gen_and = gen_andv8hi3;
50785 gen_pack = gen_sse2_packuswb;
50786 gen_shift = gen_lshrv8hi3;
50793 half_mode = V8SImode;
50794 gen_and = gen_andv8si3;
50795 gen_pack = gen_avx2_packusdw;
50796 gen_shift = gen_lshrv8si3;
50804 half_mode = V16HImode;
50805 gen_and = gen_andv16hi3;
50806 gen_pack = gen_avx2_packuswb;
50807 gen_shift = gen_lshrv16hi3;
50811 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50812 general shuffles. */
50816 /* Check that permutation is even or odd. */
50821 for (i = 1; i < nelt; ++i)
50822 if (d->perm[i] != 2 * i + odd)
50828 dop0 = gen_reg_rtx (half_mode);
50829 dop1 = gen_reg_rtx (half_mode);
50832 for (i = 0; i < nelt / 2; i++)
50833 rperm[i] = GEN_INT (c);
50834 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50835 t = force_reg (half_mode, t);
50836 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50837 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50841 emit_insn (gen_shift (dop0,
50842 gen_lowpart (half_mode, d->op0),
50844 emit_insn (gen_shift (dop1,
50845 gen_lowpart (half_mode, d->op1),
50848 /* In AVX2 for 256 bit case we need to permute pack result. */
50849 if (TARGET_AVX2 && end_perm)
50851 op = gen_reg_rtx (d->vmode);
50852 t = gen_reg_rtx (V4DImode);
50853 emit_insn (gen_pack (op, dop0, dop1));
50854 emit_insn (gen_avx2_permv4di_1 (t,
50855 gen_lowpart (V4DImode, op),
50860 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50863 emit_insn (gen_pack (d->target, dop0, dop1));
50868 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50869 and extract-odd permutations of two V64QI operands
50870 with two "shifts", two "truncs" and one "concat" insns for "odd"
50871 and two "truncs" and one concat insn for "even."
50872 Have already failed all two instruction sequences. */
50875 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50877 rtx t1, t2, t3, t4;
50878 unsigned i, odd, nelt = d->nelt;
50880 if (!TARGET_AVX512BW
50881 || d->one_operand_p
50882 || d->vmode != V64QImode)
50885 /* Check that permutation is even or odd. */
50890 for (i = 1; i < nelt; ++i)
50891 if (d->perm[i] != 2 * i + odd)
50900 t1 = gen_reg_rtx (V32HImode);
50901 t2 = gen_reg_rtx (V32HImode);
50902 emit_insn (gen_lshrv32hi3 (t1,
50903 gen_lowpart (V32HImode, d->op0),
50905 emit_insn (gen_lshrv32hi3 (t2,
50906 gen_lowpart (V32HImode, d->op1),
50911 t1 = gen_lowpart (V32HImode, d->op0);
50912 t2 = gen_lowpart (V32HImode, d->op1);
50915 t3 = gen_reg_rtx (V32QImode);
50916 t4 = gen_reg_rtx (V32QImode);
50917 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50918 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50919 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50924 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50925 and extract-odd permutations. */
50928 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50930 rtx t1, t2, t3, t4, t5;
50937 t1 = gen_reg_rtx (V4DFmode);
50938 t2 = gen_reg_rtx (V4DFmode);
50940 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50941 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
50942 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
50944 /* Now an unpck[lh]pd will produce the result required. */
50946 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
50948 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
50954 int mask = odd ? 0xdd : 0x88;
50958 t1 = gen_reg_rtx (V8SFmode);
50959 t2 = gen_reg_rtx (V8SFmode);
50960 t3 = gen_reg_rtx (V8SFmode);
50962 /* Shuffle within the 128-bit lanes to produce:
50963 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
50964 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
50967 /* Shuffle the lanes around to produce:
50968 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
50969 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
50972 /* Shuffle within the 128-bit lanes to produce:
50973 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
50974 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
50976 /* Shuffle within the 128-bit lanes to produce:
50977 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
50978 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
50980 /* Shuffle the lanes around to produce:
50981 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
50982 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
50991 /* These are always directly implementable by expand_vec_perm_1. */
50992 gcc_unreachable ();
50996 return expand_vec_perm_even_odd_pack (d);
50997 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
50998 return expand_vec_perm_pshufb2 (d);
51003 /* We need 2*log2(N)-1 operations to achieve odd/even
51004 with interleave. */
51005 t1 = gen_reg_rtx (V8HImode);
51006 t2 = gen_reg_rtx (V8HImode);
51007 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
51008 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
51009 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
51010 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
51012 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
51014 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
51020 return expand_vec_perm_even_odd_pack (d);
51024 return expand_vec_perm_even_odd_pack (d);
51027 return expand_vec_perm_even_odd_trunc (d);
51032 struct expand_vec_perm_d d_copy = *d;
51033 d_copy.vmode = V4DFmode;
51035 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
51037 d_copy.target = gen_reg_rtx (V4DFmode);
51038 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
51039 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
51040 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51043 emit_move_insn (d->target,
51044 gen_lowpart (V4DImode, d_copy.target));
51053 t1 = gen_reg_rtx (V4DImode);
51054 t2 = gen_reg_rtx (V4DImode);
51056 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
51057 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
51058 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
51060 /* Now an vpunpck[lh]qdq will produce the result required. */
51062 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
51064 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
51071 struct expand_vec_perm_d d_copy = *d;
51072 d_copy.vmode = V8SFmode;
51074 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
51076 d_copy.target = gen_reg_rtx (V8SFmode);
51077 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
51078 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
51079 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51082 emit_move_insn (d->target,
51083 gen_lowpart (V8SImode, d_copy.target));
51092 t1 = gen_reg_rtx (V8SImode);
51093 t2 = gen_reg_rtx (V8SImode);
51094 t3 = gen_reg_rtx (V4DImode);
51095 t4 = gen_reg_rtx (V4DImode);
51096 t5 = gen_reg_rtx (V4DImode);
51098 /* Shuffle the lanes around into
51099 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
51100 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
51101 gen_lowpart (V4DImode, d->op1),
51103 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
51104 gen_lowpart (V4DImode, d->op1),
51107 /* Swap the 2nd and 3rd position in each lane into
51108 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
51109 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
51110 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51111 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
51112 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51114 /* Now an vpunpck[lh]qdq will produce
51115 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
51117 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
51118 gen_lowpart (V4DImode, t2));
51120 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
51121 gen_lowpart (V4DImode, t2));
51123 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
51127 gcc_unreachable ();
51133 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51134 extract-even and extract-odd permutations. */
51137 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
51139 unsigned i, odd, nelt = d->nelt;
51142 if (odd != 0 && odd != 1)
51145 for (i = 1; i < nelt; ++i)
51146 if (d->perm[i] != 2 * i + odd)
51149 return expand_vec_perm_even_odd_1 (d, odd);
51152 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
51153 permutations. We assume that expand_vec_perm_1 has already failed. */
51156 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
51158 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
51159 machine_mode vmode = d->vmode;
51160 unsigned char perm2[4];
51161 rtx op0 = d->op0, dest;
51168 /* These are special-cased in sse.md so that we can optionally
51169 use the vbroadcast instruction. They expand to two insns
51170 if the input happens to be in a register. */
51171 gcc_unreachable ();
51177 /* These are always implementable using standard shuffle patterns. */
51178 gcc_unreachable ();
51182 /* These can be implemented via interleave. We save one insn by
51183 stopping once we have promoted to V4SImode and then use pshufd. */
51189 rtx (*gen) (rtx, rtx, rtx)
51190 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
51191 : gen_vec_interleave_lowv8hi;
51195 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
51196 : gen_vec_interleave_highv8hi;
51201 dest = gen_reg_rtx (vmode);
51202 emit_insn (gen (dest, op0, op0));
51203 vmode = get_mode_wider_vector (vmode);
51204 op0 = gen_lowpart (vmode, dest);
51206 while (vmode != V4SImode);
51208 memset (perm2, elt, 4);
51209 dest = gen_reg_rtx (V4SImode);
51210 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
51213 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
51221 /* For AVX2 broadcasts of the first element vpbroadcast* or
51222 vpermq should be used by expand_vec_perm_1. */
51223 gcc_assert (!TARGET_AVX2 || d->perm[0]);
51227 gcc_unreachable ();
51231 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51232 broadcast permutations. */
51235 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
51237 unsigned i, elt, nelt = d->nelt;
51239 if (!d->one_operand_p)
51243 for (i = 1; i < nelt; ++i)
51244 if (d->perm[i] != elt)
51247 return expand_vec_perm_broadcast_1 (d);
51250 /* Implement arbitrary permutations of two V64QImode operands
51251 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
51253 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
51255 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
51261 struct expand_vec_perm_d ds[2];
51262 rtx rperm[128], vperm, target0, target1;
51263 unsigned int i, nelt;
51264 machine_mode vmode;
51269 for (i = 0; i < 2; i++)
51272 ds[i].vmode = V32HImode;
51274 ds[i].target = gen_reg_rtx (V32HImode);
51275 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
51276 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
51279 /* Prepare permutations such that the first one takes care of
51280 putting the even bytes into the right positions or one higher
51281 positions (ds[0]) and the second one takes care of
51282 putting the odd bytes into the right positions or one below
51285 for (i = 0; i < nelt; i++)
51287 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
51290 rperm[i] = constm1_rtx;
51291 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51295 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51296 rperm[i + 64] = constm1_rtx;
51300 bool ok = expand_vec_perm_1 (&ds[0]);
51302 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
51304 ok = expand_vec_perm_1 (&ds[1]);
51306 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
51308 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
51309 vperm = force_reg (vmode, vperm);
51310 target0 = gen_reg_rtx (V64QImode);
51311 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
51313 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
51314 vperm = force_reg (vmode, vperm);
51315 target1 = gen_reg_rtx (V64QImode);
51316 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
51318 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
51322 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
51323 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
51324 all the shorter instruction sequences. */
51327 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
51329 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
51330 unsigned int i, nelt, eltsz;
51334 || d->one_operand_p
51335 || (d->vmode != V32QImode && d->vmode != V16HImode))
51342 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51344 /* Generate 4 permutation masks. If the required element is within
51345 the same lane, it is shuffled in. If the required element from the
51346 other lane, force a zero by setting bit 7 in the permutation mask.
51347 In the other mask the mask has non-negative elements if element
51348 is requested from the other lane, but also moved to the other lane,
51349 so that the result of vpshufb can have the two V2TImode halves
51351 m128 = GEN_INT (-128);
51352 for (i = 0; i < 32; ++i)
51354 rperm[0][i] = m128;
51355 rperm[1][i] = m128;
51356 rperm[2][i] = m128;
51357 rperm[3][i] = m128;
51363 for (i = 0; i < nelt; ++i)
51365 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51366 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51367 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
51369 for (j = 0; j < eltsz; ++j)
51370 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
51371 used[which] = true;
51374 for (i = 0; i < 2; ++i)
51376 if (!used[2 * i + 1])
51381 vperm = gen_rtx_CONST_VECTOR (V32QImode,
51382 gen_rtvec_v (32, rperm[2 * i + 1]));
51383 vperm = force_reg (V32QImode, vperm);
51384 h[i] = gen_reg_rtx (V32QImode);
51385 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51386 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
51389 /* Swap the 128-byte lanes of h[X]. */
51390 for (i = 0; i < 2; ++i)
51392 if (h[i] == NULL_RTX)
51394 op = gen_reg_rtx (V4DImode);
51395 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
51396 const2_rtx, GEN_INT (3), const0_rtx,
51398 h[i] = gen_lowpart (V32QImode, op);
51401 for (i = 0; i < 2; ++i)
51408 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
51409 vperm = force_reg (V32QImode, vperm);
51410 l[i] = gen_reg_rtx (V32QImode);
51411 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51412 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
51415 for (i = 0; i < 2; ++i)
51419 op = gen_reg_rtx (V32QImode);
51420 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
51427 gcc_assert (l[0] && l[1]);
51429 if (d->vmode != V32QImode)
51430 op = gen_reg_rtx (V32QImode);
51431 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
51432 if (op != d->target)
51433 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51437 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
51438 With all of the interface bits taken care of, perform the expansion
51439 in D and return true on success. */
51442 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
51444 /* Try a single instruction expansion. */
51445 if (expand_vec_perm_1 (d))
51448 /* Try sequences of two instructions. */
51450 if (expand_vec_perm_pshuflw_pshufhw (d))
51453 if (expand_vec_perm_palignr (d, false))
51456 if (expand_vec_perm_interleave2 (d))
51459 if (expand_vec_perm_broadcast (d))
51462 if (expand_vec_perm_vpermq_perm_1 (d))
51465 if (expand_vec_perm_vperm2f128 (d))
51468 if (expand_vec_perm_pblendv (d))
51471 /* Try sequences of three instructions. */
51473 if (expand_vec_perm_even_odd_pack (d))
51476 if (expand_vec_perm_2vperm2f128_vshuf (d))
51479 if (expand_vec_perm_pshufb2 (d))
51482 if (expand_vec_perm_interleave3 (d))
51485 if (expand_vec_perm_vperm2f128_vblend (d))
51488 /* Try sequences of four instructions. */
51490 if (expand_vec_perm_even_odd_trunc (d))
51492 if (expand_vec_perm_vpshufb2_vpermq (d))
51495 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
51498 if (expand_vec_perm_vpermi2_vpshub2 (d))
51501 /* ??? Look for narrow permutations whose element orderings would
51502 allow the promotion to a wider mode. */
51504 /* ??? Look for sequences of interleave or a wider permute that place
51505 the data into the correct lanes for a half-vector shuffle like
51506 pshuf[lh]w or vpermilps. */
51508 /* ??? Look for sequences of interleave that produce the desired results.
51509 The combinatorics of punpck[lh] get pretty ugly... */
51511 if (expand_vec_perm_even_odd (d))
51514 /* Even longer sequences. */
51515 if (expand_vec_perm_vpshufb4_vpermq2 (d))
51518 /* See if we can get the same permutation in different vector integer
51520 struct expand_vec_perm_d nd;
51521 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
51524 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
51531 /* If a permutation only uses one operand, make it clear. Returns true
51532 if the permutation references both operands. */
51535 canonicalize_perm (struct expand_vec_perm_d *d)
51537 int i, which, nelt = d->nelt;
51539 for (i = which = 0; i < nelt; ++i)
51540 which |= (d->perm[i] < nelt ? 1 : 2);
51542 d->one_operand_p = true;
51549 if (!rtx_equal_p (d->op0, d->op1))
51551 d->one_operand_p = false;
51554 /* The elements of PERM do not suggest that only the first operand
51555 is used, but both operands are identical. Allow easier matching
51556 of the permutation by folding the permutation into the single
51561 for (i = 0; i < nelt; ++i)
51562 d->perm[i] &= nelt - 1;
51571 return (which == 3);
51575 ix86_expand_vec_perm_const (rtx operands[4])
51577 struct expand_vec_perm_d d;
51578 unsigned char perm[MAX_VECT_LEN];
51583 d.target = operands[0];
51584 d.op0 = operands[1];
51585 d.op1 = operands[2];
51588 d.vmode = GET_MODE (d.target);
51589 gcc_assert (VECTOR_MODE_P (d.vmode));
51590 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51591 d.testing_p = false;
51593 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51594 gcc_assert (XVECLEN (sel, 0) == nelt);
51595 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51597 for (i = 0; i < nelt; ++i)
51599 rtx e = XVECEXP (sel, 0, i);
51600 int ei = INTVAL (e) & (2 * nelt - 1);
51605 two_args = canonicalize_perm (&d);
51607 if (ix86_expand_vec_perm_const_1 (&d))
51610 /* If the selector says both arguments are needed, but the operands are the
51611 same, the above tried to expand with one_operand_p and flattened selector.
51612 If that didn't work, retry without one_operand_p; we succeeded with that
51614 if (two_args && d.one_operand_p)
51616 d.one_operand_p = false;
51617 memcpy (d.perm, perm, sizeof (perm));
51618 return ix86_expand_vec_perm_const_1 (&d);
51624 /* Implement targetm.vectorize.vec_perm_const_ok. */
51627 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51628 const unsigned char *sel)
51630 struct expand_vec_perm_d d;
51631 unsigned int i, nelt, which;
51635 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51636 d.testing_p = true;
51638 /* Given sufficient ISA support we can just return true here
51639 for selected vector modes. */
51646 if (TARGET_AVX512F)
51647 /* All implementable with a single vpermi2 insn. */
51651 if (TARGET_AVX512BW)
51652 /* All implementable with a single vpermi2 insn. */
51656 if (TARGET_AVX512BW)
51657 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51664 if (TARGET_AVX512VL)
51665 /* All implementable with a single vpermi2 insn. */
51670 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51675 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51682 /* All implementable with a single vpperm insn. */
51685 /* All implementable with 2 pshufb + 1 ior. */
51691 /* All implementable with shufpd or unpck[lh]pd. */
51697 /* Extract the values from the vector CST into the permutation
51699 memcpy (d.perm, sel, nelt);
51700 for (i = which = 0; i < nelt; ++i)
51702 unsigned char e = d.perm[i];
51703 gcc_assert (e < 2 * nelt);
51704 which |= (e < nelt ? 1 : 2);
51707 /* For all elements from second vector, fold the elements to first. */
51709 for (i = 0; i < nelt; ++i)
51712 /* Check whether the mask can be applied to the vector type. */
51713 d.one_operand_p = (which != 3);
51715 /* Implementable with shufps or pshufd. */
51716 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51719 /* Otherwise we have to go through the motions and see if we can
51720 figure out how to generate the requested permutation. */
51721 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51722 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51723 if (!d.one_operand_p)
51724 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51727 ret = ix86_expand_vec_perm_const_1 (&d);
51734 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51736 struct expand_vec_perm_d d;
51742 d.vmode = GET_MODE (targ);
51743 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51744 d.one_operand_p = false;
51745 d.testing_p = false;
51747 for (i = 0; i < nelt; ++i)
51748 d.perm[i] = i * 2 + odd;
51750 /* We'll either be able to implement the permutation directly... */
51751 if (expand_vec_perm_1 (&d))
51754 /* ... or we use the special-case patterns. */
51755 expand_vec_perm_even_odd_1 (&d, odd);
51759 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51761 struct expand_vec_perm_d d;
51762 unsigned i, nelt, base;
51768 d.vmode = GET_MODE (targ);
51769 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51770 d.one_operand_p = false;
51771 d.testing_p = false;
51773 base = high_p ? nelt / 2 : 0;
51774 for (i = 0; i < nelt / 2; ++i)
51776 d.perm[i * 2] = i + base;
51777 d.perm[i * 2 + 1] = i + base + nelt;
51780 /* Note that for AVX this isn't one instruction. */
51781 ok = ix86_expand_vec_perm_const_1 (&d);
51786 /* Expand a vector operation CODE for a V*QImode in terms of the
51787 same operation on V*HImode. */
51790 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51792 machine_mode qimode = GET_MODE (dest);
51793 machine_mode himode;
51794 rtx (*gen_il) (rtx, rtx, rtx);
51795 rtx (*gen_ih) (rtx, rtx, rtx);
51796 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51797 struct expand_vec_perm_d d;
51798 bool ok, full_interleave;
51799 bool uns_p = false;
51806 gen_il = gen_vec_interleave_lowv16qi;
51807 gen_ih = gen_vec_interleave_highv16qi;
51810 himode = V16HImode;
51811 gen_il = gen_avx2_interleave_lowv32qi;
51812 gen_ih = gen_avx2_interleave_highv32qi;
51815 himode = V32HImode;
51816 gen_il = gen_avx512bw_interleave_lowv64qi;
51817 gen_ih = gen_avx512bw_interleave_highv64qi;
51820 gcc_unreachable ();
51823 op2_l = op2_h = op2;
51827 /* Unpack data such that we've got a source byte in each low byte of
51828 each word. We don't care what goes into the high byte of each word.
51829 Rather than trying to get zero in there, most convenient is to let
51830 it be a copy of the low byte. */
51831 op2_l = gen_reg_rtx (qimode);
51832 op2_h = gen_reg_rtx (qimode);
51833 emit_insn (gen_il (op2_l, op2, op2));
51834 emit_insn (gen_ih (op2_h, op2, op2));
51837 op1_l = gen_reg_rtx (qimode);
51838 op1_h = gen_reg_rtx (qimode);
51839 emit_insn (gen_il (op1_l, op1, op1));
51840 emit_insn (gen_ih (op1_h, op1, op1));
51841 full_interleave = qimode == V16QImode;
51849 op1_l = gen_reg_rtx (himode);
51850 op1_h = gen_reg_rtx (himode);
51851 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51852 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51853 full_interleave = true;
51856 gcc_unreachable ();
51859 /* Perform the operation. */
51860 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51862 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51864 gcc_assert (res_l && res_h);
51866 /* Merge the data back into the right place. */
51868 d.op0 = gen_lowpart (qimode, res_l);
51869 d.op1 = gen_lowpart (qimode, res_h);
51871 d.nelt = GET_MODE_NUNITS (qimode);
51872 d.one_operand_p = false;
51873 d.testing_p = false;
51875 if (full_interleave)
51877 /* For SSE2, we used an full interleave, so the desired
51878 results are in the even elements. */
51879 for (i = 0; i < 64; ++i)
51884 /* For AVX, the interleave used above was not cross-lane. So the
51885 extraction is evens but with the second and third quarter swapped.
51886 Happily, that is even one insn shorter than even extraction. */
51887 for (i = 0; i < 64; ++i)
51888 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
51891 ok = ix86_expand_vec_perm_const_1 (&d);
51894 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51895 gen_rtx_fmt_ee (code, qimode, op1, op2));
51898 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51899 if op is CONST_VECTOR with all odd elements equal to their
51900 preceding element. */
51903 const_vector_equal_evenodd_p (rtx op)
51905 machine_mode mode = GET_MODE (op);
51906 int i, nunits = GET_MODE_NUNITS (mode);
51907 if (GET_CODE (op) != CONST_VECTOR
51908 || nunits != CONST_VECTOR_NUNITS (op))
51910 for (i = 0; i < nunits; i += 2)
51911 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51917 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51918 bool uns_p, bool odd_p)
51920 machine_mode mode = GET_MODE (op1);
51921 machine_mode wmode = GET_MODE (dest);
51923 rtx orig_op1 = op1, orig_op2 = op2;
51925 if (!nonimmediate_operand (op1, mode))
51926 op1 = force_reg (mode, op1);
51927 if (!nonimmediate_operand (op2, mode))
51928 op2 = force_reg (mode, op2);
51930 /* We only play even/odd games with vectors of SImode. */
51931 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51933 /* If we're looking for the odd results, shift those members down to
51934 the even slots. For some cpus this is faster than a PSHUFD. */
51937 /* For XOP use vpmacsdqh, but only for smult, as it is only
51939 if (TARGET_XOP && mode == V4SImode && !uns_p)
51941 x = force_reg (wmode, CONST0_RTX (wmode));
51942 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
51946 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
51947 if (!const_vector_equal_evenodd_p (orig_op1))
51948 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
51949 x, NULL, 1, OPTAB_DIRECT);
51950 if (!const_vector_equal_evenodd_p (orig_op2))
51951 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
51952 x, NULL, 1, OPTAB_DIRECT);
51953 op1 = gen_lowpart (mode, op1);
51954 op2 = gen_lowpart (mode, op2);
51957 if (mode == V16SImode)
51960 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
51962 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
51964 else if (mode == V8SImode)
51967 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
51969 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
51972 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
51973 else if (TARGET_SSE4_1)
51974 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
51977 rtx s1, s2, t0, t1, t2;
51979 /* The easiest way to implement this without PMULDQ is to go through
51980 the motions as if we are performing a full 64-bit multiply. With
51981 the exception that we need to do less shuffling of the elements. */
51983 /* Compute the sign-extension, aka highparts, of the two operands. */
51984 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51985 op1, pc_rtx, pc_rtx);
51986 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51987 op2, pc_rtx, pc_rtx);
51989 /* Multiply LO(A) * HI(B), and vice-versa. */
51990 t1 = gen_reg_rtx (wmode);
51991 t2 = gen_reg_rtx (wmode);
51992 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
51993 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
51995 /* Multiply LO(A) * LO(B). */
51996 t0 = gen_reg_rtx (wmode);
51997 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
51999 /* Combine and shift the highparts into place. */
52000 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
52001 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
52004 /* Combine high and low parts. */
52005 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
52012 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
52013 bool uns_p, bool high_p)
52015 machine_mode wmode = GET_MODE (dest);
52016 machine_mode mode = GET_MODE (op1);
52017 rtx t1, t2, t3, t4, mask;
52022 t1 = gen_reg_rtx (mode);
52023 t2 = gen_reg_rtx (mode);
52024 if (TARGET_XOP && !uns_p)
52026 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
52027 shuffle the elements once so that all elements are in the right
52028 place for immediate use: { A C B D }. */
52029 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
52030 const1_rtx, GEN_INT (3)));
52031 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
52032 const1_rtx, GEN_INT (3)));
52036 /* Put the elements into place for the multiply. */
52037 ix86_expand_vec_interleave (t1, op1, op1, high_p);
52038 ix86_expand_vec_interleave (t2, op2, op2, high_p);
52041 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
52045 /* Shuffle the elements between the lanes. After this we
52046 have { A B E F | C D G H } for each operand. */
52047 t1 = gen_reg_rtx (V4DImode);
52048 t2 = gen_reg_rtx (V4DImode);
52049 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
52050 const0_rtx, const2_rtx,
52051 const1_rtx, GEN_INT (3)));
52052 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
52053 const0_rtx, const2_rtx,
52054 const1_rtx, GEN_INT (3)));
52056 /* Shuffle the elements within the lanes. After this we
52057 have { A A B B | C C D D } or { E E F F | G G H H }. */
52058 t3 = gen_reg_rtx (V8SImode);
52059 t4 = gen_reg_rtx (V8SImode);
52060 mask = GEN_INT (high_p
52061 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
52062 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
52063 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
52064 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
52066 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
52071 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
52072 uns_p, OPTAB_DIRECT);
52073 t2 = expand_binop (mode,
52074 uns_p ? umul_highpart_optab : smul_highpart_optab,
52075 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
52076 gcc_assert (t1 && t2);
52078 t3 = gen_reg_rtx (mode);
52079 ix86_expand_vec_interleave (t3, t1, t2, high_p);
52080 emit_move_insn (dest, gen_lowpart (wmode, t3));
52088 t1 = gen_reg_rtx (wmode);
52089 t2 = gen_reg_rtx (wmode);
52090 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
52091 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
52093 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
52097 gcc_unreachable ();
52102 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
52104 rtx res_1, res_2, res_3, res_4;
52106 res_1 = gen_reg_rtx (V4SImode);
52107 res_2 = gen_reg_rtx (V4SImode);
52108 res_3 = gen_reg_rtx (V2DImode);
52109 res_4 = gen_reg_rtx (V2DImode);
52110 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
52111 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
52113 /* Move the results in element 2 down to element 1; we don't care
52114 what goes in elements 2 and 3. Then we can merge the parts
52115 back together with an interleave.
52117 Note that two other sequences were tried:
52118 (1) Use interleaves at the start instead of psrldq, which allows
52119 us to use a single shufps to merge things back at the end.
52120 (2) Use shufps here to combine the two vectors, then pshufd to
52121 put the elements in the correct order.
52122 In both cases the cost of the reformatting stall was too high
52123 and the overall sequence slower. */
52125 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
52126 const0_rtx, const2_rtx,
52127 const0_rtx, const0_rtx));
52128 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
52129 const0_rtx, const2_rtx,
52130 const0_rtx, const0_rtx));
52131 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
52133 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
52137 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
52139 machine_mode mode = GET_MODE (op0);
52140 rtx t1, t2, t3, t4, t5, t6;
52142 if (TARGET_AVX512DQ && mode == V8DImode)
52143 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
52144 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
52145 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
52146 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
52147 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
52148 else if (TARGET_XOP && mode == V2DImode)
52150 /* op1: A,B,C,D, op2: E,F,G,H */
52151 op1 = gen_lowpart (V4SImode, op1);
52152 op2 = gen_lowpart (V4SImode, op2);
52154 t1 = gen_reg_rtx (V4SImode);
52155 t2 = gen_reg_rtx (V4SImode);
52156 t3 = gen_reg_rtx (V2DImode);
52157 t4 = gen_reg_rtx (V2DImode);
52160 emit_insn (gen_sse2_pshufd_1 (t1, op1,
52166 /* t2: (B*E),(A*F),(D*G),(C*H) */
52167 emit_insn (gen_mulv4si3 (t2, t1, op2));
52169 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
52170 emit_insn (gen_xop_phadddq (t3, t2));
52172 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
52173 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
52175 /* Multiply lower parts and add all */
52176 t5 = gen_reg_rtx (V2DImode);
52177 emit_insn (gen_vec_widen_umult_even_v4si (t5,
52178 gen_lowpart (V4SImode, op1),
52179 gen_lowpart (V4SImode, op2)));
52180 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
52185 machine_mode nmode;
52186 rtx (*umul) (rtx, rtx, rtx);
52188 if (mode == V2DImode)
52190 umul = gen_vec_widen_umult_even_v4si;
52193 else if (mode == V4DImode)
52195 umul = gen_vec_widen_umult_even_v8si;
52198 else if (mode == V8DImode)
52200 umul = gen_vec_widen_umult_even_v16si;
52204 gcc_unreachable ();
52207 /* Multiply low parts. */
52208 t1 = gen_reg_rtx (mode);
52209 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
52211 /* Shift input vectors right 32 bits so we can multiply high parts. */
52213 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
52214 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
52216 /* Multiply high parts by low parts. */
52217 t4 = gen_reg_rtx (mode);
52218 t5 = gen_reg_rtx (mode);
52219 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
52220 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
52222 /* Combine and shift the highparts back. */
52223 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
52224 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
52226 /* Combine high and low parts. */
52227 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
52230 set_unique_reg_note (get_last_insn (), REG_EQUAL,
52231 gen_rtx_MULT (mode, op1, op2));
52234 /* Return 1 if control tansfer instruction INSN
52235 should be encoded with bnd prefix.
52236 If insn is NULL then return 1 when control
52237 transfer instructions should be prefixed with
52238 bnd by default for current function. */
52241 ix86_bnd_prefixed_insn_p (rtx insn)
52243 /* For call insns check special flag. */
52244 if (insn && CALL_P (insn))
52246 rtx call = get_call_rtx_from (insn);
52248 return CALL_EXPR_WITH_BOUNDS_P (call);
52251 /* All other insns are prefixed only if function is instrumented. */
52252 return chkp_function_instrumented_p (current_function_decl);
52255 /* Calculate integer abs() using only SSE2 instructions. */
52258 ix86_expand_sse2_abs (rtx target, rtx input)
52260 machine_mode mode = GET_MODE (target);
52265 /* For 32-bit signed integer X, the best way to calculate the absolute
52266 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
52268 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
52269 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
52270 NULL, 0, OPTAB_DIRECT);
52271 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
52272 NULL, 0, OPTAB_DIRECT);
52273 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
52274 target, 0, OPTAB_DIRECT);
52277 /* For 16-bit signed integer X, the best way to calculate the absolute
52278 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
52280 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52282 x = expand_simple_binop (mode, SMAX, tmp0, input,
52283 target, 0, OPTAB_DIRECT);
52286 /* For 8-bit signed integer X, the best way to calculate the absolute
52287 value of X is min ((unsigned char) X, (unsigned char) (-X)),
52288 as SSE2 provides the PMINUB insn. */
52290 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52292 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
52293 target, 0, OPTAB_DIRECT);
52297 gcc_unreachable ();
52301 emit_move_insn (target, x);
52304 /* Expand an extract from a vector register through pextr insn.
52305 Return true if successful. */
52308 ix86_expand_pextr (rtx *operands)
52310 rtx dst = operands[0];
52311 rtx src = operands[1];
52313 unsigned int size = INTVAL (operands[2]);
52314 unsigned int pos = INTVAL (operands[3]);
52316 if (SUBREG_P (dst))
52318 /* Reject non-lowpart subregs. */
52319 if (SUBREG_BYTE (dst) > 0)
52321 dst = SUBREG_REG (dst);
52324 if (SUBREG_P (src))
52326 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
52327 src = SUBREG_REG (src);
52330 switch (GET_MODE (src))
52339 machine_mode srcmode, dstmode;
52342 dstmode = mode_for_size (size, MODE_INT, 0);
52347 if (!TARGET_SSE4_1)
52349 srcmode = V16QImode;
52355 srcmode = V8HImode;
52359 if (!TARGET_SSE4_1)
52361 srcmode = V4SImode;
52365 gcc_assert (TARGET_64BIT);
52366 if (!TARGET_SSE4_1)
52368 srcmode = V2DImode;
52375 /* Reject extractions from misaligned positions. */
52376 if (pos & (size-1))
52379 if (GET_MODE (dst) == dstmode)
52382 d = gen_reg_rtx (dstmode);
52384 /* Construct insn pattern. */
52385 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
52386 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
52388 /* Let the rtl optimizers know about the zero extension performed. */
52389 if (dstmode == QImode || dstmode == HImode)
52391 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
52392 d = gen_lowpart (SImode, d);
52395 emit_insn (gen_rtx_SET (d, pat));
52398 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52407 /* Expand an insert into a vector register through pinsr insn.
52408 Return true if successful. */
52411 ix86_expand_pinsr (rtx *operands)
52413 rtx dst = operands[0];
52414 rtx src = operands[3];
52416 unsigned int size = INTVAL (operands[1]);
52417 unsigned int pos = INTVAL (operands[2]);
52419 if (SUBREG_P (dst))
52421 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
52422 dst = SUBREG_REG (dst);
52425 switch (GET_MODE (dst))
52434 machine_mode srcmode, dstmode;
52435 rtx (*pinsr)(rtx, rtx, rtx, rtx);
52438 srcmode = mode_for_size (size, MODE_INT, 0);
52443 if (!TARGET_SSE4_1)
52445 dstmode = V16QImode;
52446 pinsr = gen_sse4_1_pinsrb;
52452 dstmode = V8HImode;
52453 pinsr = gen_sse2_pinsrw;
52457 if (!TARGET_SSE4_1)
52459 dstmode = V4SImode;
52460 pinsr = gen_sse4_1_pinsrd;
52464 gcc_assert (TARGET_64BIT);
52465 if (!TARGET_SSE4_1)
52467 dstmode = V2DImode;
52468 pinsr = gen_sse4_1_pinsrq;
52475 /* Reject insertions to misaligned positions. */
52476 if (pos & (size-1))
52479 if (SUBREG_P (src))
52481 unsigned int srcpos = SUBREG_BYTE (src);
52487 extr_ops[0] = gen_reg_rtx (srcmode);
52488 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
52489 extr_ops[2] = GEN_INT (size);
52490 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
52492 if (!ix86_expand_pextr (extr_ops))
52498 src = gen_lowpart (srcmode, SUBREG_REG (src));
52501 if (GET_MODE (dst) == dstmode)
52504 d = gen_reg_rtx (dstmode);
52506 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
52507 gen_lowpart (srcmode, src),
52508 GEN_INT (1 << (pos / size))));
52510 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52519 /* This function returns the calling abi specific va_list type node.
52520 It returns the FNDECL specific va_list type. */
52523 ix86_fn_abi_va_list (tree fndecl)
52526 return va_list_type_node;
52527 gcc_assert (fndecl != NULL_TREE);
52529 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
52530 return ms_va_list_type_node;
52532 return sysv_va_list_type_node;
52535 /* Returns the canonical va_list type specified by TYPE. If there
52536 is no valid TYPE provided, it return NULL_TREE. */
52539 ix86_canonical_va_list_type (tree type)
52543 /* Resolve references and pointers to va_list type. */
52544 if (TREE_CODE (type) == MEM_REF)
52545 type = TREE_TYPE (type);
52546 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
52547 type = TREE_TYPE (type);
52548 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
52549 type = TREE_TYPE (type);
52551 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
52553 wtype = va_list_type_node;
52554 gcc_assert (wtype != NULL_TREE);
52556 if (TREE_CODE (wtype) == ARRAY_TYPE)
52558 /* If va_list is an array type, the argument may have decayed
52559 to a pointer type, e.g. by being passed to another function.
52560 In that case, unwrap both types so that we can compare the
52561 underlying records. */
52562 if (TREE_CODE (htype) == ARRAY_TYPE
52563 || POINTER_TYPE_P (htype))
52565 wtype = TREE_TYPE (wtype);
52566 htype = TREE_TYPE (htype);
52569 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52570 return va_list_type_node;
52571 wtype = sysv_va_list_type_node;
52572 gcc_assert (wtype != NULL_TREE);
52574 if (TREE_CODE (wtype) == ARRAY_TYPE)
52576 /* If va_list is an array type, the argument may have decayed
52577 to a pointer type, e.g. by being passed to another function.
52578 In that case, unwrap both types so that we can compare the
52579 underlying records. */
52580 if (TREE_CODE (htype) == ARRAY_TYPE
52581 || POINTER_TYPE_P (htype))
52583 wtype = TREE_TYPE (wtype);
52584 htype = TREE_TYPE (htype);
52587 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52588 return sysv_va_list_type_node;
52589 wtype = ms_va_list_type_node;
52590 gcc_assert (wtype != NULL_TREE);
52592 if (TREE_CODE (wtype) == ARRAY_TYPE)
52594 /* If va_list is an array type, the argument may have decayed
52595 to a pointer type, e.g. by being passed to another function.
52596 In that case, unwrap both types so that we can compare the
52597 underlying records. */
52598 if (TREE_CODE (htype) == ARRAY_TYPE
52599 || POINTER_TYPE_P (htype))
52601 wtype = TREE_TYPE (wtype);
52602 htype = TREE_TYPE (htype);
52605 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52606 return ms_va_list_type_node;
52609 return std_canonical_va_list_type (type);
52612 /* Iterate through the target-specific builtin types for va_list.
52613 IDX denotes the iterator, *PTREE is set to the result type of
52614 the va_list builtin, and *PNAME to its internal type.
52615 Returns zero if there is no element for this index, otherwise
52616 IDX should be increased upon the next call.
52617 Note, do not iterate a base builtin's name like __builtin_va_list.
52618 Used from c_common_nodes_and_builtins. */
52621 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52631 *ptree = ms_va_list_type_node;
52632 *pname = "__builtin_ms_va_list";
52636 *ptree = sysv_va_list_type_node;
52637 *pname = "__builtin_sysv_va_list";
52645 #undef TARGET_SCHED_DISPATCH
52646 #define TARGET_SCHED_DISPATCH has_dispatch
52647 #undef TARGET_SCHED_DISPATCH_DO
52648 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52649 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52650 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52651 #undef TARGET_SCHED_REORDER
52652 #define TARGET_SCHED_REORDER ix86_sched_reorder
52653 #undef TARGET_SCHED_ADJUST_PRIORITY
52654 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52655 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52656 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52657 ix86_dependencies_evaluation_hook
52659 /* The size of the dispatch window is the total number of bytes of
52660 object code allowed in a window. */
52661 #define DISPATCH_WINDOW_SIZE 16
52663 /* Number of dispatch windows considered for scheduling. */
52664 #define MAX_DISPATCH_WINDOWS 3
52666 /* Maximum number of instructions in a window. */
52669 /* Maximum number of immediate operands in a window. */
52672 /* Maximum number of immediate bits allowed in a window. */
52673 #define MAX_IMM_SIZE 128
52675 /* Maximum number of 32 bit immediates allowed in a window. */
52676 #define MAX_IMM_32 4
52678 /* Maximum number of 64 bit immediates allowed in a window. */
52679 #define MAX_IMM_64 2
52681 /* Maximum total of loads or prefetches allowed in a window. */
52684 /* Maximum total of stores allowed in a window. */
52685 #define MAX_STORE 1
52691 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52692 enum dispatch_group {
52707 /* Number of allowable groups in a dispatch window. It is an array
52708 indexed by dispatch_group enum. 100 is used as a big number,
52709 because the number of these kind of operations does not have any
52710 effect in dispatch window, but we need them for other reasons in
52712 static unsigned int num_allowable_groups[disp_last] = {
52713 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52716 char group_name[disp_last + 1][16] = {
52717 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52718 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52719 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52722 /* Instruction path. */
52725 path_single, /* Single micro op. */
52726 path_double, /* Double micro op. */
52727 path_multi, /* Instructions with more than 2 micro op.. */
52731 /* sched_insn_info defines a window to the instructions scheduled in
52732 the basic block. It contains a pointer to the insn_info table and
52733 the instruction scheduled.
52735 Windows are allocated for each basic block and are linked
52737 typedef struct sched_insn_info_s {
52739 enum dispatch_group group;
52740 enum insn_path path;
52745 /* Linked list of dispatch windows. This is a two way list of
52746 dispatch windows of a basic block. It contains information about
52747 the number of uops in the window and the total number of
52748 instructions and of bytes in the object code for this dispatch
52750 typedef struct dispatch_windows_s {
52751 int num_insn; /* Number of insn in the window. */
52752 int num_uops; /* Number of uops in the window. */
52753 int window_size; /* Number of bytes in the window. */
52754 int window_num; /* Window number between 0 or 1. */
52755 int num_imm; /* Number of immediates in an insn. */
52756 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52757 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52758 int imm_size; /* Total immediates in the window. */
52759 int num_loads; /* Total memory loads in the window. */
52760 int num_stores; /* Total memory stores in the window. */
52761 int violation; /* Violation exists in window. */
52762 sched_insn_info *window; /* Pointer to the window. */
52763 struct dispatch_windows_s *next;
52764 struct dispatch_windows_s *prev;
52765 } dispatch_windows;
52767 /* Immediate valuse used in an insn. */
52768 typedef struct imm_info_s
52775 static dispatch_windows *dispatch_window_list;
52776 static dispatch_windows *dispatch_window_list1;
52778 /* Get dispatch group of insn. */
52780 static enum dispatch_group
52781 get_mem_group (rtx_insn *insn)
52783 enum attr_memory memory;
52785 if (INSN_CODE (insn) < 0)
52786 return disp_no_group;
52787 memory = get_attr_memory (insn);
52788 if (memory == MEMORY_STORE)
52791 if (memory == MEMORY_LOAD)
52794 if (memory == MEMORY_BOTH)
52795 return disp_load_store;
52797 return disp_no_group;
52800 /* Return true if insn is a compare instruction. */
52803 is_cmp (rtx_insn *insn)
52805 enum attr_type type;
52807 type = get_attr_type (insn);
52808 return (type == TYPE_TEST
52809 || type == TYPE_ICMP
52810 || type == TYPE_FCMP
52811 || GET_CODE (PATTERN (insn)) == COMPARE);
52814 /* Return true if a dispatch violation encountered. */
52817 dispatch_violation (void)
52819 if (dispatch_window_list->next)
52820 return dispatch_window_list->next->violation;
52821 return dispatch_window_list->violation;
52824 /* Return true if insn is a branch instruction. */
52827 is_branch (rtx_insn *insn)
52829 return (CALL_P (insn) || JUMP_P (insn));
52832 /* Return true if insn is a prefetch instruction. */
52835 is_prefetch (rtx_insn *insn)
52837 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52840 /* This function initializes a dispatch window and the list container holding a
52841 pointer to the window. */
52844 init_window (int window_num)
52847 dispatch_windows *new_list;
52849 if (window_num == 0)
52850 new_list = dispatch_window_list;
52852 new_list = dispatch_window_list1;
52854 new_list->num_insn = 0;
52855 new_list->num_uops = 0;
52856 new_list->window_size = 0;
52857 new_list->next = NULL;
52858 new_list->prev = NULL;
52859 new_list->window_num = window_num;
52860 new_list->num_imm = 0;
52861 new_list->num_imm_32 = 0;
52862 new_list->num_imm_64 = 0;
52863 new_list->imm_size = 0;
52864 new_list->num_loads = 0;
52865 new_list->num_stores = 0;
52866 new_list->violation = false;
52868 for (i = 0; i < MAX_INSN; i++)
52870 new_list->window[i].insn = NULL;
52871 new_list->window[i].group = disp_no_group;
52872 new_list->window[i].path = no_path;
52873 new_list->window[i].byte_len = 0;
52874 new_list->window[i].imm_bytes = 0;
52879 /* This function allocates and initializes a dispatch window and the
52880 list container holding a pointer to the window. */
52882 static dispatch_windows *
52883 allocate_window (void)
52885 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52886 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52891 /* This routine initializes the dispatch scheduling information. It
52892 initiates building dispatch scheduler tables and constructs the
52893 first dispatch window. */
52896 init_dispatch_sched (void)
52898 /* Allocate a dispatch list and a window. */
52899 dispatch_window_list = allocate_window ();
52900 dispatch_window_list1 = allocate_window ();
52905 /* This function returns true if a branch is detected. End of a basic block
52906 does not have to be a branch, but here we assume only branches end a
52910 is_end_basic_block (enum dispatch_group group)
52912 return group == disp_branch;
52915 /* This function is called when the end of a window processing is reached. */
52918 process_end_window (void)
52920 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52921 if (dispatch_window_list->next)
52923 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52924 gcc_assert (dispatch_window_list->window_size
52925 + dispatch_window_list1->window_size <= 48);
52931 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52932 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52933 for 48 bytes of instructions. Note that these windows are not dispatch
52934 windows that their sizes are DISPATCH_WINDOW_SIZE. */
52936 static dispatch_windows *
52937 allocate_next_window (int window_num)
52939 if (window_num == 0)
52941 if (dispatch_window_list->next)
52944 return dispatch_window_list;
52947 dispatch_window_list->next = dispatch_window_list1;
52948 dispatch_window_list1->prev = dispatch_window_list;
52950 return dispatch_window_list1;
52953 /* Compute number of immediate operands of an instruction. */
52956 find_constant (rtx in_rtx, imm_info *imm_values)
52958 if (INSN_P (in_rtx))
52959 in_rtx = PATTERN (in_rtx);
52960 subrtx_iterator::array_type array;
52961 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
52962 if (const_rtx x = *iter)
52963 switch (GET_CODE (x))
52968 (imm_values->imm)++;
52969 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
52970 (imm_values->imm32)++;
52972 (imm_values->imm64)++;
52976 case CONST_WIDE_INT:
52977 (imm_values->imm)++;
52978 (imm_values->imm64)++;
52982 if (LABEL_KIND (x) == LABEL_NORMAL)
52984 (imm_values->imm)++;
52985 (imm_values->imm32)++;
52994 /* Return total size of immediate operands of an instruction along with number
52995 of corresponding immediate-operands. It initializes its parameters to zero
52996 befor calling FIND_CONSTANT.
52997 INSN is the input instruction. IMM is the total of immediates.
52998 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
53002 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
53004 imm_info imm_values = {0, 0, 0};
53006 find_constant (insn, &imm_values);
53007 *imm = imm_values.imm;
53008 *imm32 = imm_values.imm32;
53009 *imm64 = imm_values.imm64;
53010 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
53013 /* This function indicates if an operand of an instruction is an
53017 has_immediate (rtx_insn *insn)
53019 int num_imm_operand;
53020 int num_imm32_operand;
53021 int num_imm64_operand;
53024 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53025 &num_imm64_operand);
53029 /* Return single or double path for instructions. */
53031 static enum insn_path
53032 get_insn_path (rtx_insn *insn)
53034 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
53036 if ((int)path == 0)
53037 return path_single;
53039 if ((int)path == 1)
53040 return path_double;
53045 /* Return insn dispatch group. */
53047 static enum dispatch_group
53048 get_insn_group (rtx_insn *insn)
53050 enum dispatch_group group = get_mem_group (insn);
53054 if (is_branch (insn))
53055 return disp_branch;
53060 if (has_immediate (insn))
53063 if (is_prefetch (insn))
53064 return disp_prefetch;
53066 return disp_no_group;
53069 /* Count number of GROUP restricted instructions in a dispatch
53070 window WINDOW_LIST. */
53073 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
53075 enum dispatch_group group = get_insn_group (insn);
53077 int num_imm_operand;
53078 int num_imm32_operand;
53079 int num_imm64_operand;
53081 if (group == disp_no_group)
53084 if (group == disp_imm)
53086 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53087 &num_imm64_operand);
53088 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
53089 || num_imm_operand + window_list->num_imm > MAX_IMM
53090 || (num_imm32_operand > 0
53091 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
53092 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
53093 || (num_imm64_operand > 0
53094 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
53095 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
53096 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
53097 && num_imm64_operand > 0
53098 && ((window_list->num_imm_64 > 0
53099 && window_list->num_insn >= 2)
53100 || window_list->num_insn >= 3)))
53106 if ((group == disp_load_store
53107 && (window_list->num_loads >= MAX_LOAD
53108 || window_list->num_stores >= MAX_STORE))
53109 || ((group == disp_load
53110 || group == disp_prefetch)
53111 && window_list->num_loads >= MAX_LOAD)
53112 || (group == disp_store
53113 && window_list->num_stores >= MAX_STORE))
53119 /* This function returns true if insn satisfies dispatch rules on the
53120 last window scheduled. */
53123 fits_dispatch_window (rtx_insn *insn)
53125 dispatch_windows *window_list = dispatch_window_list;
53126 dispatch_windows *window_list_next = dispatch_window_list->next;
53127 unsigned int num_restrict;
53128 enum dispatch_group group = get_insn_group (insn);
53129 enum insn_path path = get_insn_path (insn);
53132 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
53133 instructions should be given the lowest priority in the
53134 scheduling process in Haifa scheduler to make sure they will be
53135 scheduled in the same dispatch window as the reference to them. */
53136 if (group == disp_jcc || group == disp_cmp)
53139 /* Check nonrestricted. */
53140 if (group == disp_no_group || group == disp_branch)
53143 /* Get last dispatch window. */
53144 if (window_list_next)
53145 window_list = window_list_next;
53147 if (window_list->window_num == 1)
53149 sum = window_list->prev->window_size + window_list->window_size;
53152 || (min_insn_size (insn) + sum) >= 48)
53153 /* Window 1 is full. Go for next window. */
53157 num_restrict = count_num_restricted (insn, window_list);
53159 if (num_restrict > num_allowable_groups[group])
53162 /* See if it fits in the first window. */
53163 if (window_list->window_num == 0)
53165 /* The first widow should have only single and double path
53167 if (path == path_double
53168 && (window_list->num_uops + 2) > MAX_INSN)
53170 else if (path != path_single)
53176 /* Add an instruction INSN with NUM_UOPS micro-operations to the
53177 dispatch window WINDOW_LIST. */
53180 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
53182 int byte_len = min_insn_size (insn);
53183 int num_insn = window_list->num_insn;
53185 sched_insn_info *window = window_list->window;
53186 enum dispatch_group group = get_insn_group (insn);
53187 enum insn_path path = get_insn_path (insn);
53188 int num_imm_operand;
53189 int num_imm32_operand;
53190 int num_imm64_operand;
53192 if (!window_list->violation && group != disp_cmp
53193 && !fits_dispatch_window (insn))
53194 window_list->violation = true;
53196 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53197 &num_imm64_operand);
53199 /* Initialize window with new instruction. */
53200 window[num_insn].insn = insn;
53201 window[num_insn].byte_len = byte_len;
53202 window[num_insn].group = group;
53203 window[num_insn].path = path;
53204 window[num_insn].imm_bytes = imm_size;
53206 window_list->window_size += byte_len;
53207 window_list->num_insn = num_insn + 1;
53208 window_list->num_uops = window_list->num_uops + num_uops;
53209 window_list->imm_size += imm_size;
53210 window_list->num_imm += num_imm_operand;
53211 window_list->num_imm_32 += num_imm32_operand;
53212 window_list->num_imm_64 += num_imm64_operand;
53214 if (group == disp_store)
53215 window_list->num_stores += 1;
53216 else if (group == disp_load
53217 || group == disp_prefetch)
53218 window_list->num_loads += 1;
53219 else if (group == disp_load_store)
53221 window_list->num_stores += 1;
53222 window_list->num_loads += 1;
53226 /* Adds a scheduled instruction, INSN, to the current dispatch window.
53227 If the total bytes of instructions or the number of instructions in
53228 the window exceed allowable, it allocates a new window. */
53231 add_to_dispatch_window (rtx_insn *insn)
53234 dispatch_windows *window_list;
53235 dispatch_windows *next_list;
53236 dispatch_windows *window0_list;
53237 enum insn_path path;
53238 enum dispatch_group insn_group;
53246 if (INSN_CODE (insn) < 0)
53249 byte_len = min_insn_size (insn);
53250 window_list = dispatch_window_list;
53251 next_list = window_list->next;
53252 path = get_insn_path (insn);
53253 insn_group = get_insn_group (insn);
53255 /* Get the last dispatch window. */
53257 window_list = dispatch_window_list->next;
53259 if (path == path_single)
53261 else if (path == path_double)
53264 insn_num_uops = (int) path;
53266 /* If current window is full, get a new window.
53267 Window number zero is full, if MAX_INSN uops are scheduled in it.
53268 Window number one is full, if window zero's bytes plus window
53269 one's bytes is 32, or if the bytes of the new instruction added
53270 to the total makes it greater than 48, or it has already MAX_INSN
53271 instructions in it. */
53272 num_insn = window_list->num_insn;
53273 num_uops = window_list->num_uops;
53274 window_num = window_list->window_num;
53275 insn_fits = fits_dispatch_window (insn);
53277 if (num_insn >= MAX_INSN
53278 || num_uops + insn_num_uops > MAX_INSN
53281 window_num = ~window_num & 1;
53282 window_list = allocate_next_window (window_num);
53285 if (window_num == 0)
53287 add_insn_window (insn, window_list, insn_num_uops);
53288 if (window_list->num_insn >= MAX_INSN
53289 && insn_group == disp_branch)
53291 process_end_window ();
53295 else if (window_num == 1)
53297 window0_list = window_list->prev;
53298 sum = window0_list->window_size + window_list->window_size;
53300 || (byte_len + sum) >= 48)
53302 process_end_window ();
53303 window_list = dispatch_window_list;
53306 add_insn_window (insn, window_list, insn_num_uops);
53309 gcc_unreachable ();
53311 if (is_end_basic_block (insn_group))
53313 /* End of basic block is reached do end-basic-block process. */
53314 process_end_window ();
53319 /* Print the dispatch window, WINDOW_NUM, to FILE. */
53321 DEBUG_FUNCTION static void
53322 debug_dispatch_window_file (FILE *file, int window_num)
53324 dispatch_windows *list;
53327 if (window_num == 0)
53328 list = dispatch_window_list;
53330 list = dispatch_window_list1;
53332 fprintf (file, "Window #%d:\n", list->window_num);
53333 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
53334 list->num_insn, list->num_uops, list->window_size);
53335 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53336 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
53338 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
53340 fprintf (file, " insn info:\n");
53342 for (i = 0; i < MAX_INSN; i++)
53344 if (!list->window[i].insn)
53346 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
53347 i, group_name[list->window[i].group],
53348 i, (void *)list->window[i].insn,
53349 i, list->window[i].path,
53350 i, list->window[i].byte_len,
53351 i, list->window[i].imm_bytes);
53355 /* Print to stdout a dispatch window. */
53357 DEBUG_FUNCTION void
53358 debug_dispatch_window (int window_num)
53360 debug_dispatch_window_file (stdout, window_num);
53363 /* Print INSN dispatch information to FILE. */
53365 DEBUG_FUNCTION static void
53366 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
53369 enum insn_path path;
53370 enum dispatch_group group;
53372 int num_imm_operand;
53373 int num_imm32_operand;
53374 int num_imm64_operand;
53376 if (INSN_CODE (insn) < 0)
53379 byte_len = min_insn_size (insn);
53380 path = get_insn_path (insn);
53381 group = get_insn_group (insn);
53382 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53383 &num_imm64_operand);
53385 fprintf (file, " insn info:\n");
53386 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
53387 group_name[group], path, byte_len);
53388 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53389 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
53392 /* Print to STDERR the status of the ready list with respect to
53393 dispatch windows. */
53395 DEBUG_FUNCTION void
53396 debug_ready_dispatch (void)
53399 int no_ready = number_in_ready ();
53401 fprintf (stdout, "Number of ready: %d\n", no_ready);
53403 for (i = 0; i < no_ready; i++)
53404 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
53407 /* This routine is the driver of the dispatch scheduler. */
53410 do_dispatch (rtx_insn *insn, int mode)
53412 if (mode == DISPATCH_INIT)
53413 init_dispatch_sched ();
53414 else if (mode == ADD_TO_DISPATCH_WINDOW)
53415 add_to_dispatch_window (insn);
53418 /* Return TRUE if Dispatch Scheduling is supported. */
53421 has_dispatch (rtx_insn *insn, int action)
53423 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
53424 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
53430 case IS_DISPATCH_ON:
53435 return is_cmp (insn);
53437 case DISPATCH_VIOLATION:
53438 return dispatch_violation ();
53440 case FITS_DISPATCH_WINDOW:
53441 return fits_dispatch_window (insn);
53447 /* Implementation of reassociation_width target hook used by
53448 reassoc phase to identify parallelism level in reassociated
53449 tree. Statements tree_code is passed in OPC. Arguments type
53452 Currently parallel reassociation is enabled for Atom
53453 processors only and we set reassociation width to be 2
53454 because Atom may issue up to 2 instructions per cycle.
53456 Return value should be fixed if parallel reassociation is
53457 enabled for other processors. */
53460 ix86_reassociation_width (unsigned int, machine_mode mode)
53463 if (VECTOR_MODE_P (mode))
53465 if (TARGET_VECTOR_PARALLEL_EXECUTION)
53472 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
53474 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
53475 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
53480 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
53481 place emms and femms instructions. */
53483 static machine_mode
53484 ix86_preferred_simd_mode (machine_mode mode)
53492 return TARGET_AVX512BW ? V64QImode :
53493 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
53495 return TARGET_AVX512BW ? V32HImode :
53496 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
53498 return TARGET_AVX512F ? V16SImode :
53499 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
53501 return TARGET_AVX512F ? V8DImode :
53502 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
53505 if (TARGET_AVX512F)
53507 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53513 if (!TARGET_VECTORIZE_DOUBLE)
53515 else if (TARGET_AVX512F)
53517 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53519 else if (TARGET_SSE2)
53528 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
53529 vectors. If AVX512F is enabled then try vectorizing with 512bit,
53530 256bit and 128bit vectors. */
53532 static unsigned int
53533 ix86_autovectorize_vector_sizes (void)
53535 return TARGET_AVX512F ? 64 | 32 | 16 :
53536 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
53539 /* Implemenation of targetm.vectorize.get_mask_mode. */
53541 static machine_mode
53542 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
53544 unsigned elem_size = vector_size / nunits;
53546 /* Scalar mask case. */
53547 if ((TARGET_AVX512F && vector_size == 64)
53548 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
53550 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
53551 return smallest_mode_for_size (nunits, MODE_INT);
53554 machine_mode elem_mode
53555 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
53557 gcc_assert (elem_size * nunits == vector_size);
53559 return mode_for_vector (elem_mode, nunits);
53564 /* Return class of registers which could be used for pseudo of MODE
53565 and of class RCLASS for spilling instead of memory. Return NO_REGS
53566 if it is not possible or non-profitable. */
53568 ix86_spill_class (reg_class_t rclass, machine_mode mode)
53570 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
53571 && (mode == SImode || (TARGET_64BIT && mode == DImode))
53572 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
53573 return ALL_SSE_REGS;
53577 /* Implement targetm.vectorize.init_cost. */
53580 ix86_init_cost (struct loop *)
53582 unsigned *cost = XNEWVEC (unsigned, 3);
53583 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53587 /* Implement targetm.vectorize.add_stmt_cost. */
53590 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53591 struct _stmt_vec_info *stmt_info, int misalign,
53592 enum vect_cost_model_location where)
53594 unsigned *cost = (unsigned *) data;
53595 unsigned retval = 0;
53597 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53598 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53600 /* Statements in an inner loop relative to the loop being
53601 vectorized are weighted more heavily. The value here is
53602 arbitrary and could potentially be improved with analysis. */
53603 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53604 count *= 50; /* FIXME. */
53606 retval = (unsigned) (count * stmt_cost);
53608 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53609 for Silvermont as it has out of order integer pipeline and can execute
53610 2 scalar instruction per tick, but has in order SIMD pipeline. */
53611 if (TARGET_SILVERMONT || TARGET_INTEL)
53612 if (stmt_info && stmt_info->stmt)
53614 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53615 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53616 retval = (retval * 17) / 10;
53619 cost[where] += retval;
53624 /* Implement targetm.vectorize.finish_cost. */
53627 ix86_finish_cost (void *data, unsigned *prologue_cost,
53628 unsigned *body_cost, unsigned *epilogue_cost)
53630 unsigned *cost = (unsigned *) data;
53631 *prologue_cost = cost[vect_prologue];
53632 *body_cost = cost[vect_body];
53633 *epilogue_cost = cost[vect_epilogue];
53636 /* Implement targetm.vectorize.destroy_cost_data. */
53639 ix86_destroy_cost_data (void *data)
53644 /* Validate target specific memory model bits in VAL. */
53646 static unsigned HOST_WIDE_INT
53647 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53649 enum memmodel model = memmodel_from_int (val);
53652 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53654 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53656 warning (OPT_Winvalid_memory_model,
53657 "Unknown architecture specific memory model");
53658 return MEMMODEL_SEQ_CST;
53660 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53661 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53663 warning (OPT_Winvalid_memory_model,
53664 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53665 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53667 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53669 warning (OPT_Winvalid_memory_model,
53670 "HLE_RELEASE not used with RELEASE or stronger memory model");
53671 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53676 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53677 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53678 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53679 or number of vecsize_mangle variants that should be emitted. */
53682 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53683 struct cgraph_simd_clone *clonei,
53684 tree base_type, int num)
53688 if (clonei->simdlen
53689 && (clonei->simdlen < 2
53690 || clonei->simdlen > 16
53691 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53693 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53694 "unsupported simdlen %d", clonei->simdlen);
53698 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53699 if (TREE_CODE (ret_type) != VOID_TYPE)
53700 switch (TYPE_MODE (ret_type))
53712 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53713 "unsupported return type %qT for simd\n", ret_type);
53720 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53721 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53722 switch (TYPE_MODE (TREE_TYPE (t)))
53734 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53735 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53739 if (clonei->cilk_elemental)
53741 /* Parse here processor clause. If not present, default to 'b'. */
53742 clonei->vecsize_mangle = 'b';
53744 else if (!TREE_PUBLIC (node->decl))
53746 /* If the function isn't exported, we can pick up just one ISA
53749 clonei->vecsize_mangle = 'd';
53750 else if (TARGET_AVX)
53751 clonei->vecsize_mangle = 'c';
53753 clonei->vecsize_mangle = 'b';
53758 clonei->vecsize_mangle = "bcd"[num];
53761 switch (clonei->vecsize_mangle)
53764 clonei->vecsize_int = 128;
53765 clonei->vecsize_float = 128;
53768 clonei->vecsize_int = 128;
53769 clonei->vecsize_float = 256;
53772 clonei->vecsize_int = 256;
53773 clonei->vecsize_float = 256;
53776 if (clonei->simdlen == 0)
53778 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53779 clonei->simdlen = clonei->vecsize_int;
53781 clonei->simdlen = clonei->vecsize_float;
53782 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53783 if (clonei->simdlen > 16)
53784 clonei->simdlen = 16;
53789 /* Add target attribute to SIMD clone NODE if needed. */
53792 ix86_simd_clone_adjust (struct cgraph_node *node)
53794 const char *str = NULL;
53795 gcc_assert (node->decl == cfun->decl);
53796 switch (node->simdclone->vecsize_mangle)
53811 gcc_unreachable ();
53816 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53817 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53820 ix86_reset_previous_fndecl ();
53821 ix86_set_current_function (node->decl);
53824 /* If SIMD clone NODE can't be used in a vectorized loop
53825 in current function, return -1, otherwise return a badness of using it
53826 (0 if it is most desirable from vecsize_mangle point of view, 1
53827 slightly less desirable, etc.). */
53830 ix86_simd_clone_usable (struct cgraph_node *node)
53832 switch (node->simdclone->vecsize_mangle)
53839 return TARGET_AVX2 ? 2 : 1;
53843 return TARGET_AVX2 ? 1 : 0;
53850 gcc_unreachable ();
53854 /* This function adjusts the unroll factor based on
53855 the hardware capabilities. For ex, bdver3 has
53856 a loop buffer which makes unrolling of smaller
53857 loops less important. This function decides the
53858 unroll factor using number of memory references
53859 (value 32 is used) as a heuristic. */
53862 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53867 unsigned mem_count = 0;
53869 if (!TARGET_ADJUST_UNROLL)
53872 /* Count the number of memory references within the loop body.
53873 This value determines the unrolling factor for bdver3 and bdver4
53875 subrtx_iterator::array_type array;
53876 bbs = get_loop_body (loop);
53877 for (i = 0; i < loop->num_nodes; i++)
53878 FOR_BB_INSNS (bbs[i], insn)
53879 if (NONDEBUG_INSN_P (insn))
53880 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53881 if (const_rtx x = *iter)
53884 machine_mode mode = GET_MODE (x);
53885 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53893 if (mem_count && mem_count <=32)
53894 return 32/mem_count;
53900 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53903 ix86_float_exceptions_rounding_supported_p (void)
53905 /* For x87 floating point with standard excess precision handling,
53906 there is no adddf3 pattern (since x87 floating point only has
53907 XFmode operations) so the default hook implementation gets this
53909 return TARGET_80387 || TARGET_SSE_MATH;
53912 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53915 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53917 if (!TARGET_80387 && !TARGET_SSE_MATH)
53919 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53922 tree fenv_index_type = build_index_type (size_int (6));
53923 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53924 tree fenv_var = create_tmp_var_raw (fenv_type);
53925 TREE_ADDRESSABLE (fenv_var) = 1;
53926 tree fenv_ptr = build_pointer_type (fenv_type);
53927 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53928 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53929 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53930 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53931 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53932 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53933 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
53934 tree hold_fnclex = build_call_expr (fnclex, 0);
53935 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
53936 NULL_TREE, NULL_TREE);
53937 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
53939 *clear = build_call_expr (fnclex, 0);
53940 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
53941 tree fnstsw_call = build_call_expr (fnstsw, 0);
53942 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
53943 sw_var, fnstsw_call);
53944 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
53945 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
53946 exceptions_var, exceptions_x87);
53947 *update = build2 (COMPOUND_EXPR, integer_type_node,
53948 sw_mod, update_mod);
53949 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
53950 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
53952 if (TARGET_SSE_MATH)
53954 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
53955 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
53956 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
53957 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
53958 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
53959 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
53960 mxcsr_orig_var, stmxcsr_hold_call);
53961 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
53963 build_int_cst (unsigned_type_node, 0x1f80));
53964 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
53965 build_int_cst (unsigned_type_node, 0xffffffc0));
53966 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
53967 mxcsr_mod_var, hold_mod_val);
53968 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53969 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
53970 hold_assign_orig, hold_assign_mod);
53971 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
53972 ldmxcsr_hold_call);
53974 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
53977 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53979 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
53980 ldmxcsr_clear_call);
53982 *clear = ldmxcsr_clear_call;
53983 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
53984 tree exceptions_sse = fold_convert (integer_type_node,
53985 stxmcsr_update_call);
53988 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
53989 exceptions_var, exceptions_sse);
53990 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
53991 exceptions_var, exceptions_mod);
53992 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
53993 exceptions_assign);
53996 *update = build2 (MODIFY_EXPR, integer_type_node,
53997 exceptions_var, exceptions_sse);
53998 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
53999 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54000 ldmxcsr_update_call);
54002 tree atomic_feraiseexcept
54003 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
54004 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
54005 1, exceptions_var);
54006 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54007 atomic_feraiseexcept_call);
54010 /* Return mode to be used for bounds or VOIDmode
54011 if bounds are not supported. */
54013 static enum machine_mode
54014 ix86_mpx_bound_mode ()
54016 /* Do not support pointer checker if MPX
54020 if (flag_check_pointer_bounds)
54021 warning (0, "Pointer Checker requires MPX support on this target."
54022 " Use -mmpx options to enable MPX.");
54029 /* Return constant used to statically initialize constant bounds.
54031 This function is used to create special bound values. For now
54032 only INIT bounds and NONE bounds are expected. More special
54033 values may be added later. */
54036 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
54038 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
54039 : build_zero_cst (pointer_sized_int_node);
54040 tree high = ub ? build_zero_cst (pointer_sized_int_node)
54041 : build_minus_one_cst (pointer_sized_int_node);
54043 /* This function is supposed to be used to create INIT and
54044 NONE bounds only. */
54045 gcc_assert ((lb == 0 && ub == -1)
54046 || (lb == -1 && ub == 0));
54048 return build_complex (NULL, low, high);
54051 /* Generate a list of statements STMTS to initialize pointer bounds
54052 variable VAR with bounds LB and UB. Return the number of generated
54056 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
54058 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
54059 tree lhs, modify, var_p;
54061 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
54062 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
54064 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
54065 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
54066 append_to_statement_list (modify, stmts);
54068 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
54069 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
54070 TYPE_SIZE_UNIT (pointer_sized_int_node)));
54071 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
54072 append_to_statement_list (modify, stmts);
54077 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
54078 /* For i386, common symbol is local only for non-PIE binaries. For
54079 x86-64, common symbol is local only for non-PIE binaries or linker
54080 supports copy reloc in PIE binaries. */
54083 ix86_binds_local_p (const_tree exp)
54085 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
54088 && HAVE_LD_PIE_COPYRELOC != 0)));
54092 /* If MEM is in the form of [base+offset], extract the two parts
54093 of address and set to BASE and OFFSET, otherwise return false. */
54096 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
54100 gcc_assert (MEM_P (mem));
54102 addr = XEXP (mem, 0);
54104 if (GET_CODE (addr) == CONST)
54105 addr = XEXP (addr, 0);
54107 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
54110 *offset = const0_rtx;
54114 if (GET_CODE (addr) == PLUS
54115 && (REG_P (XEXP (addr, 0))
54116 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
54117 && CONST_INT_P (XEXP (addr, 1)))
54119 *base = XEXP (addr, 0);
54120 *offset = XEXP (addr, 1);
54127 /* Given OPERANDS of consecutive load/store, check if we can merge
54128 them into move multiple. LOAD is true if they are load instructions.
54129 MODE is the mode of memory operands. */
54132 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
54133 enum machine_mode mode)
54135 HOST_WIDE_INT offval_1, offval_2, msize;
54136 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
54140 mem_1 = operands[1];
54141 mem_2 = operands[3];
54142 reg_1 = operands[0];
54143 reg_2 = operands[2];
54147 mem_1 = operands[0];
54148 mem_2 = operands[2];
54149 reg_1 = operands[1];
54150 reg_2 = operands[3];
54153 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
54155 if (REGNO (reg_1) != REGNO (reg_2))
54158 /* Check if the addresses are in the form of [base+offset]. */
54159 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
54161 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
54164 /* Check if the bases are the same. */
54165 if (!rtx_equal_p (base_1, base_2))
54168 offval_1 = INTVAL (offset_1);
54169 offval_2 = INTVAL (offset_2);
54170 msize = GET_MODE_SIZE (mode);
54171 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
54172 if (offval_1 + msize != offval_2)
54178 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
54181 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
54182 optimization_type opt_type)
54196 return opt_type == OPTIMIZE_FOR_SPEED;
54199 if (SSE_FLOAT_MODE_P (mode1)
54201 && !flag_trapping_math
54203 return opt_type == OPTIMIZE_FOR_SPEED;
54209 if (SSE_FLOAT_MODE_P (mode1)
54211 && !flag_trapping_math
54214 return opt_type == OPTIMIZE_FOR_SPEED;
54217 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
54224 /* Address space support.
54226 This is not "far pointers" in the 16-bit sense, but an easy way
54227 to use %fs and %gs segment prefixes. Therefore:
54229 (a) All address spaces have the same modes,
54230 (b) All address spaces have the same addresss forms,
54231 (c) While %fs and %gs are technically subsets of the generic
54232 address space, they are probably not subsets of each other.
54233 (d) Since we have no access to the segment base register values
54234 without resorting to a system call, we cannot convert a
54235 non-default address space to a default address space.
54236 Therefore we do not claim %fs or %gs are subsets of generic.
54237 (e) However, __seg_tls uses UNSPEC_TP as the base (which itself is
54238 stored at __seg_tls:0) so we can map between tls and generic. */
54241 ix86_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
54243 return (subset == superset
54244 || (superset == ADDR_SPACE_GENERIC
54245 && subset == ADDR_SPACE_SEG_TLS));
54247 #undef TARGET_ADDR_SPACE_SUBSET_P
54248 #define TARGET_ADDR_SPACE_SUBSET_P ix86_addr_space_subset_p
54251 ix86_addr_space_convert (rtx op, tree from_type, tree to_type)
54253 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
54254 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
54256 /* Conversion between SEG_TLS and GENERIC is handled by adding or
54257 subtracting the thread pointer. */
54258 if ((from_as == ADDR_SPACE_GENERIC && to_as == ADDR_SPACE_SEG_TLS)
54259 || (from_as == ADDR_SPACE_SEG_TLS && to_as == ADDR_SPACE_GENERIC))
54261 machine_mode mode = GET_MODE (op);
54262 if (mode == VOIDmode)
54264 rtx tp = get_thread_pointer (mode, optimize || mode != ptr_mode);
54265 return expand_binop (mode, (to_as == ADDR_SPACE_GENERIC
54266 ? add_optab : sub_optab),
54267 op, tp, NULL, 1, OPTAB_WIDEN);
54272 #undef TARGET_ADDR_SPACE_CONVERT
54273 #define TARGET_ADDR_SPACE_CONVERT ix86_addr_space_convert
54276 ix86_addr_space_debug (addr_space_t as)
54278 /* Fold __seg_tls to __seg_fs or __seg_gs for debugging. */
54279 if (as == ADDR_SPACE_SEG_TLS)
54280 as = DEFAULT_TLS_SEG_REG;
54283 #undef TARGET_ADDR_SPACE_DEBUG
54284 #define TARGET_ADDR_SPACE_DEBUG ix86_addr_space_debug
54286 /* All use of segmentation is assumed to make address 0 valid. */
54289 ix86_addr_space_zero_address_valid (addr_space_t as)
54291 return as != ADDR_SPACE_GENERIC;
54293 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
54294 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
54296 /* Initialize the GCC target structure. */
54297 #undef TARGET_RETURN_IN_MEMORY
54298 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
54300 #undef TARGET_LEGITIMIZE_ADDRESS
54301 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
54303 #undef TARGET_ATTRIBUTE_TABLE
54304 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
54305 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
54306 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
54307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54308 # undef TARGET_MERGE_DECL_ATTRIBUTES
54309 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
54312 #undef TARGET_COMP_TYPE_ATTRIBUTES
54313 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
54315 #undef TARGET_INIT_BUILTINS
54316 #define TARGET_INIT_BUILTINS ix86_init_builtins
54317 #undef TARGET_BUILTIN_DECL
54318 #define TARGET_BUILTIN_DECL ix86_builtin_decl
54319 #undef TARGET_EXPAND_BUILTIN
54320 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
54322 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
54323 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
54324 ix86_builtin_vectorized_function
54326 #undef TARGET_VECTORIZE_BUILTIN_GATHER
54327 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
54329 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
54330 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
54332 #undef TARGET_BUILTIN_RECIPROCAL
54333 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
54335 #undef TARGET_ASM_FUNCTION_EPILOGUE
54336 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
54338 #undef TARGET_ENCODE_SECTION_INFO
54339 #ifndef SUBTARGET_ENCODE_SECTION_INFO
54340 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
54342 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
54345 #undef TARGET_ASM_OPEN_PAREN
54346 #define TARGET_ASM_OPEN_PAREN ""
54347 #undef TARGET_ASM_CLOSE_PAREN
54348 #define TARGET_ASM_CLOSE_PAREN ""
54350 #undef TARGET_ASM_BYTE_OP
54351 #define TARGET_ASM_BYTE_OP ASM_BYTE
54353 #undef TARGET_ASM_ALIGNED_HI_OP
54354 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
54355 #undef TARGET_ASM_ALIGNED_SI_OP
54356 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
54358 #undef TARGET_ASM_ALIGNED_DI_OP
54359 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
54362 #undef TARGET_PROFILE_BEFORE_PROLOGUE
54363 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
54365 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
54366 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
54368 #undef TARGET_ASM_UNALIGNED_HI_OP
54369 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
54370 #undef TARGET_ASM_UNALIGNED_SI_OP
54371 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
54372 #undef TARGET_ASM_UNALIGNED_DI_OP
54373 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
54375 #undef TARGET_PRINT_OPERAND
54376 #define TARGET_PRINT_OPERAND ix86_print_operand
54377 #undef TARGET_PRINT_OPERAND_ADDRESS
54378 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
54379 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
54380 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
54381 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
54382 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
54384 #undef TARGET_SCHED_INIT_GLOBAL
54385 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
54386 #undef TARGET_SCHED_ADJUST_COST
54387 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
54388 #undef TARGET_SCHED_ISSUE_RATE
54389 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
54390 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
54391 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
54392 ia32_multipass_dfa_lookahead
54393 #undef TARGET_SCHED_MACRO_FUSION_P
54394 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
54395 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
54396 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
54398 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
54399 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
54401 #undef TARGET_MEMMODEL_CHECK
54402 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
54404 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
54405 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
54408 #undef TARGET_HAVE_TLS
54409 #define TARGET_HAVE_TLS true
54411 #undef TARGET_CANNOT_FORCE_CONST_MEM
54412 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
54413 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
54414 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
54416 #undef TARGET_DELEGITIMIZE_ADDRESS
54417 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
54419 #undef TARGET_MS_BITFIELD_LAYOUT_P
54420 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
54423 #undef TARGET_BINDS_LOCAL_P
54424 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
54426 #undef TARGET_BINDS_LOCAL_P
54427 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
54429 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54430 #undef TARGET_BINDS_LOCAL_P
54431 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
54434 #undef TARGET_ASM_OUTPUT_MI_THUNK
54435 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
54436 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
54437 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
54439 #undef TARGET_ASM_FILE_START
54440 #define TARGET_ASM_FILE_START x86_file_start
54442 #undef TARGET_OPTION_OVERRIDE
54443 #define TARGET_OPTION_OVERRIDE ix86_option_override
54445 #undef TARGET_REGISTER_MOVE_COST
54446 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
54447 #undef TARGET_MEMORY_MOVE_COST
54448 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
54449 #undef TARGET_RTX_COSTS
54450 #define TARGET_RTX_COSTS ix86_rtx_costs
54451 #undef TARGET_ADDRESS_COST
54452 #define TARGET_ADDRESS_COST ix86_address_cost
54454 #undef TARGET_FIXED_CONDITION_CODE_REGS
54455 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
54456 #undef TARGET_CC_MODES_COMPATIBLE
54457 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
54459 #undef TARGET_MACHINE_DEPENDENT_REORG
54460 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
54462 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
54463 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
54465 #undef TARGET_BUILD_BUILTIN_VA_LIST
54466 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
54468 #undef TARGET_FOLD_BUILTIN
54469 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
54471 #undef TARGET_COMPARE_VERSION_PRIORITY
54472 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
54474 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
54475 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
54476 ix86_generate_version_dispatcher_body
54478 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
54479 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
54480 ix86_get_function_versions_dispatcher
54482 #undef TARGET_ENUM_VA_LIST_P
54483 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
54485 #undef TARGET_FN_ABI_VA_LIST
54486 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
54488 #undef TARGET_CANONICAL_VA_LIST_TYPE
54489 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
54491 #undef TARGET_EXPAND_BUILTIN_VA_START
54492 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
54494 #undef TARGET_MD_ASM_ADJUST
54495 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
54497 #undef TARGET_PROMOTE_PROTOTYPES
54498 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
54499 #undef TARGET_SETUP_INCOMING_VARARGS
54500 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
54501 #undef TARGET_MUST_PASS_IN_STACK
54502 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
54503 #undef TARGET_FUNCTION_ARG_ADVANCE
54504 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
54505 #undef TARGET_FUNCTION_ARG
54506 #define TARGET_FUNCTION_ARG ix86_function_arg
54507 #undef TARGET_INIT_PIC_REG
54508 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
54509 #undef TARGET_USE_PSEUDO_PIC_REG
54510 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
54511 #undef TARGET_FUNCTION_ARG_BOUNDARY
54512 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
54513 #undef TARGET_PASS_BY_REFERENCE
54514 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
54515 #undef TARGET_INTERNAL_ARG_POINTER
54516 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
54517 #undef TARGET_UPDATE_STACK_BOUNDARY
54518 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
54519 #undef TARGET_GET_DRAP_RTX
54520 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
54521 #undef TARGET_STRICT_ARGUMENT_NAMING
54522 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
54523 #undef TARGET_STATIC_CHAIN
54524 #define TARGET_STATIC_CHAIN ix86_static_chain
54525 #undef TARGET_TRAMPOLINE_INIT
54526 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
54527 #undef TARGET_RETURN_POPS_ARGS
54528 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
54530 #undef TARGET_LEGITIMATE_COMBINED_INSN
54531 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
54533 #undef TARGET_ASAN_SHADOW_OFFSET
54534 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
54536 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
54537 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
54539 #undef TARGET_SCALAR_MODE_SUPPORTED_P
54540 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
54542 #undef TARGET_VECTOR_MODE_SUPPORTED_P
54543 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
54545 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
54546 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
54547 ix86_libgcc_floating_mode_supported_p
54549 #undef TARGET_C_MODE_FOR_SUFFIX
54550 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
54553 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
54554 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
54557 #ifdef SUBTARGET_INSERT_ATTRIBUTES
54558 #undef TARGET_INSERT_ATTRIBUTES
54559 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
54562 #undef TARGET_MANGLE_TYPE
54563 #define TARGET_MANGLE_TYPE ix86_mangle_type
54566 #undef TARGET_STACK_PROTECT_FAIL
54567 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
54570 #undef TARGET_FUNCTION_VALUE
54571 #define TARGET_FUNCTION_VALUE ix86_function_value
54573 #undef TARGET_FUNCTION_VALUE_REGNO_P
54574 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
54576 #undef TARGET_PROMOTE_FUNCTION_MODE
54577 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
54579 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
54580 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
54582 #undef TARGET_MEMBER_TYPE_FORCES_BLK
54583 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
54585 #undef TARGET_INSTANTIATE_DECLS
54586 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
54588 #undef TARGET_SECONDARY_RELOAD
54589 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
54591 #undef TARGET_CLASS_MAX_NREGS
54592 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
54594 #undef TARGET_PREFERRED_RELOAD_CLASS
54595 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
54596 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
54597 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
54598 #undef TARGET_CLASS_LIKELY_SPILLED_P
54599 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
54601 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
54602 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
54603 ix86_builtin_vectorization_cost
54604 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
54605 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
54606 ix86_vectorize_vec_perm_const_ok
54607 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
54608 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
54609 ix86_preferred_simd_mode
54610 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
54611 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
54612 ix86_autovectorize_vector_sizes
54613 #undef TARGET_VECTORIZE_GET_MASK_MODE
54614 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
54615 #undef TARGET_VECTORIZE_INIT_COST
54616 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
54617 #undef TARGET_VECTORIZE_ADD_STMT_COST
54618 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
54619 #undef TARGET_VECTORIZE_FINISH_COST
54620 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
54621 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
54622 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
54624 #undef TARGET_SET_CURRENT_FUNCTION
54625 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
54627 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
54628 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
54630 #undef TARGET_OPTION_SAVE
54631 #define TARGET_OPTION_SAVE ix86_function_specific_save
54633 #undef TARGET_OPTION_RESTORE
54634 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
54636 #undef TARGET_OPTION_POST_STREAM_IN
54637 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
54639 #undef TARGET_OPTION_PRINT
54640 #define TARGET_OPTION_PRINT ix86_function_specific_print
54642 #undef TARGET_OPTION_FUNCTION_VERSIONS
54643 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
54645 #undef TARGET_CAN_INLINE_P
54646 #define TARGET_CAN_INLINE_P ix86_can_inline_p
54648 #undef TARGET_LEGITIMATE_ADDRESS_P
54649 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
54651 #undef TARGET_LRA_P
54652 #define TARGET_LRA_P hook_bool_void_true
54654 #undef TARGET_REGISTER_PRIORITY
54655 #define TARGET_REGISTER_PRIORITY ix86_register_priority
54657 #undef TARGET_REGISTER_USAGE_LEVELING_P
54658 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
54660 #undef TARGET_LEGITIMATE_CONSTANT_P
54661 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
54663 #undef TARGET_FRAME_POINTER_REQUIRED
54664 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
54666 #undef TARGET_CAN_ELIMINATE
54667 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
54669 #undef TARGET_EXTRA_LIVE_ON_ENTRY
54670 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
54672 #undef TARGET_ASM_CODE_END
54673 #define TARGET_ASM_CODE_END ix86_code_end
54675 #undef TARGET_CONDITIONAL_REGISTER_USAGE
54676 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
54679 #undef TARGET_INIT_LIBFUNCS
54680 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
54683 #undef TARGET_LOOP_UNROLL_ADJUST
54684 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
54686 #undef TARGET_SPILL_CLASS
54687 #define TARGET_SPILL_CLASS ix86_spill_class
54689 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
54690 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
54691 ix86_simd_clone_compute_vecsize_and_simdlen
54693 #undef TARGET_SIMD_CLONE_ADJUST
54694 #define TARGET_SIMD_CLONE_ADJUST \
54695 ix86_simd_clone_adjust
54697 #undef TARGET_SIMD_CLONE_USABLE
54698 #define TARGET_SIMD_CLONE_USABLE \
54699 ix86_simd_clone_usable
54701 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54702 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54703 ix86_float_exceptions_rounding_supported_p
54705 #undef TARGET_MODE_EMIT
54706 #define TARGET_MODE_EMIT ix86_emit_mode_set
54708 #undef TARGET_MODE_NEEDED
54709 #define TARGET_MODE_NEEDED ix86_mode_needed
54711 #undef TARGET_MODE_AFTER
54712 #define TARGET_MODE_AFTER ix86_mode_after
54714 #undef TARGET_MODE_ENTRY
54715 #define TARGET_MODE_ENTRY ix86_mode_entry
54717 #undef TARGET_MODE_EXIT
54718 #define TARGET_MODE_EXIT ix86_mode_exit
54720 #undef TARGET_MODE_PRIORITY
54721 #define TARGET_MODE_PRIORITY ix86_mode_priority
54723 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54724 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54726 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54727 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54729 #undef TARGET_STORE_BOUNDS_FOR_ARG
54730 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54732 #undef TARGET_LOAD_RETURNED_BOUNDS
54733 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54735 #undef TARGET_STORE_RETURNED_BOUNDS
54736 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54738 #undef TARGET_CHKP_BOUND_MODE
54739 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54741 #undef TARGET_BUILTIN_CHKP_FUNCTION
54742 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54744 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54745 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54747 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54748 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54750 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54751 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54753 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54754 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54756 #undef TARGET_OFFLOAD_OPTIONS
54757 #define TARGET_OFFLOAD_OPTIONS \
54758 ix86_offload_options
54760 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54761 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54763 #undef TARGET_OPTAB_SUPPORTED_P
54764 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
54766 struct gcc_target targetm = TARGET_INITIALIZER;
54768 #include "gt-i386.h"