1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
31 #include "stringpool.h"
38 #include "diagnostic.h"
41 #include "fold-const.h"
44 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
58 #include "tm-constrs.h"
61 #include "sched-int.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 static rtx legitimize_dllimport_symbol (rtx, bool);
83 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
84 static rtx legitimize_pe_coff_symbol (rtx, bool);
85 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
87 #ifndef CHECK_STACK_LIMIT
88 #define CHECK_STACK_LIMIT (-1)
91 /* Return index of given mode in mult and division cost tables. */
92 #define MODE_INDEX(mode) \
93 ((mode) == QImode ? 0 \
94 : (mode) == HImode ? 1 \
95 : (mode) == SImode ? 2 \
96 : (mode) == DImode ? 3 \
99 /* Processor costs (relative to an add) */
100 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
101 #define COSTS_N_BYTES(N) ((N) * 2)
103 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
105 static stringop_algs ix86_size_memcpy[2] = {
106 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
108 static stringop_algs ix86_size_memset[2] = {
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
114 COSTS_N_BYTES (2), /* cost of an add instruction */
115 COSTS_N_BYTES (3), /* cost of a lea instruction */
116 COSTS_N_BYTES (2), /* variable shift costs */
117 COSTS_N_BYTES (3), /* constant shift costs */
118 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
119 COSTS_N_BYTES (3), /* HI */
120 COSTS_N_BYTES (3), /* SI */
121 COSTS_N_BYTES (3), /* DI */
122 COSTS_N_BYTES (5)}, /* other */
123 0, /* cost of multiply per each bit set */
124 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 COSTS_N_BYTES (3), /* cost of movsx */
130 COSTS_N_BYTES (3), /* cost of movzx */
131 0, /* "large" insn */
133 2, /* cost for loading QImode using movzbl */
134 {2, 2, 2}, /* cost of loading integer registers
135 in QImode, HImode and SImode.
136 Relative to reg-reg move (2). */
137 {2, 2, 2}, /* cost of storing integer registers */
138 2, /* cost of reg,reg fld/fst */
139 {2, 2, 2}, /* cost of loading fp registers
140 in SFmode, DFmode and XFmode */
141 {2, 2, 2}, /* cost of storing fp registers
142 in SFmode, DFmode and XFmode */
143 3, /* cost of moving MMX register */
144 {3, 3}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {3, 3}, /* cost of storing MMX registers
147 in SImode and DImode */
148 3, /* cost of moving SSE register */
149 {3, 3, 3}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {3, 3, 3}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3, /* MMX or SSE register to integer */
154 0, /* size of l1 cache */
155 0, /* size of l2 cache */
156 0, /* size of prefetch block */
157 0, /* number of parallel prefetches */
159 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
160 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
161 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
162 COSTS_N_BYTES (2), /* cost of FABS instruction. */
163 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
164 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 1, /* scalar_stmt_cost. */
168 1, /* scalar load_cost. */
169 1, /* scalar_store_cost. */
170 1, /* vec_stmt_cost. */
171 1, /* vec_to_scalar_cost. */
172 1, /* scalar_to_vec_cost. */
173 1, /* vec_align_load_cost. */
174 1, /* vec_unalign_load_cost. */
175 1, /* vec_store_cost. */
176 1, /* cond_taken_branch_cost. */
177 1, /* cond_not_taken_branch_cost. */
180 /* Processor costs (relative to an add) */
181 static stringop_algs i386_memcpy[2] = {
182 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
183 DUMMY_STRINGOP_ALGS};
184 static stringop_algs i386_memset[2] = {
185 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
186 DUMMY_STRINGOP_ALGS};
189 struct processor_costs i386_cost = { /* 386 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (6), /* HI */
196 COSTS_N_INSNS (6), /* SI */
197 COSTS_N_INSNS (6), /* DI */
198 COSTS_N_INSNS (6)}, /* other */
199 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (23), /* HI */
202 COSTS_N_INSNS (23), /* SI */
203 COSTS_N_INSNS (23), /* DI */
204 COSTS_N_INSNS (23)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of l1 cache */
231 0, /* size of l2 cache */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
235 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
236 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
237 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
238 COSTS_N_INSNS (22), /* cost of FABS instruction. */
239 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
240 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 1, /* scalar_stmt_cost. */
244 1, /* scalar load_cost. */
245 1, /* scalar_store_cost. */
246 1, /* vec_stmt_cost. */
247 1, /* vec_to_scalar_cost. */
248 1, /* scalar_to_vec_cost. */
249 1, /* vec_align_load_cost. */
250 2, /* vec_unalign_load_cost. */
251 1, /* vec_store_cost. */
252 3, /* cond_taken_branch_cost. */
253 1, /* cond_not_taken_branch_cost. */
256 static stringop_algs i486_memcpy[2] = {
257 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
258 DUMMY_STRINGOP_ALGS};
259 static stringop_algs i486_memset[2] = {
260 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
261 DUMMY_STRINGOP_ALGS};
264 struct processor_costs i486_cost = { /* 486 specific costs */
265 COSTS_N_INSNS (1), /* cost of an add instruction */
266 COSTS_N_INSNS (1), /* cost of a lea instruction */
267 COSTS_N_INSNS (3), /* variable shift costs */
268 COSTS_N_INSNS (2), /* constant shift costs */
269 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
270 COSTS_N_INSNS (12), /* HI */
271 COSTS_N_INSNS (12), /* SI */
272 COSTS_N_INSNS (12), /* DI */
273 COSTS_N_INSNS (12)}, /* other */
274 1, /* cost of multiply per each bit set */
275 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
276 COSTS_N_INSNS (40), /* HI */
277 COSTS_N_INSNS (40), /* SI */
278 COSTS_N_INSNS (40), /* DI */
279 COSTS_N_INSNS (40)}, /* other */
280 COSTS_N_INSNS (3), /* cost of movsx */
281 COSTS_N_INSNS (2), /* cost of movzx */
282 15, /* "large" insn */
284 4, /* cost for loading QImode using movzbl */
285 {2, 4, 2}, /* cost of loading integer registers
286 in QImode, HImode and SImode.
287 Relative to reg-reg move (2). */
288 {2, 4, 2}, /* cost of storing integer registers */
289 2, /* cost of reg,reg fld/fst */
290 {8, 8, 8}, /* cost of loading fp registers
291 in SFmode, DFmode and XFmode */
292 {8, 8, 8}, /* cost of storing fp registers
293 in SFmode, DFmode and XFmode */
294 2, /* cost of moving MMX register */
295 {4, 8}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {4, 8}, /* cost of storing MMX registers
298 in SImode and DImode */
299 2, /* cost of moving SSE register */
300 {4, 8, 16}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {4, 8, 16}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 3, /* MMX or SSE register to integer */
305 4, /* size of l1 cache. 486 has 8kB cache
306 shared for code and data, so 4kB is
307 not really precise. */
308 4, /* size of l2 cache */
309 0, /* size of prefetch block */
310 0, /* number of parallel prefetches */
312 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
313 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
314 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
315 COSTS_N_INSNS (3), /* cost of FABS instruction. */
316 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
317 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 1, /* scalar_stmt_cost. */
321 1, /* scalar load_cost. */
322 1, /* scalar_store_cost. */
323 1, /* vec_stmt_cost. */
324 1, /* vec_to_scalar_cost. */
325 1, /* scalar_to_vec_cost. */
326 1, /* vec_align_load_cost. */
327 2, /* vec_unalign_load_cost. */
328 1, /* vec_store_cost. */
329 3, /* cond_taken_branch_cost. */
330 1, /* cond_not_taken_branch_cost. */
333 static stringop_algs pentium_memcpy[2] = {
334 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
335 DUMMY_STRINGOP_ALGS};
336 static stringop_algs pentium_memset[2] = {
337 {libcall, {{-1, rep_prefix_4_byte, false}}},
338 DUMMY_STRINGOP_ALGS};
341 struct processor_costs pentium_cost = {
342 COSTS_N_INSNS (1), /* cost of an add instruction */
343 COSTS_N_INSNS (1), /* cost of a lea instruction */
344 COSTS_N_INSNS (4), /* variable shift costs */
345 COSTS_N_INSNS (1), /* constant shift costs */
346 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
347 COSTS_N_INSNS (11), /* HI */
348 COSTS_N_INSNS (11), /* SI */
349 COSTS_N_INSNS (11), /* DI */
350 COSTS_N_INSNS (11)}, /* other */
351 0, /* cost of multiply per each bit set */
352 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
353 COSTS_N_INSNS (25), /* HI */
354 COSTS_N_INSNS (25), /* SI */
355 COSTS_N_INSNS (25), /* DI */
356 COSTS_N_INSNS (25)}, /* other */
357 COSTS_N_INSNS (3), /* cost of movsx */
358 COSTS_N_INSNS (2), /* cost of movzx */
359 8, /* "large" insn */
361 6, /* cost for loading QImode using movzbl */
362 {2, 4, 2}, /* cost of loading integer registers
363 in QImode, HImode and SImode.
364 Relative to reg-reg move (2). */
365 {2, 4, 2}, /* cost of storing integer registers */
366 2, /* cost of reg,reg fld/fst */
367 {2, 2, 6}, /* cost of loading fp registers
368 in SFmode, DFmode and XFmode */
369 {4, 4, 6}, /* cost of storing fp registers
370 in SFmode, DFmode and XFmode */
371 8, /* cost of moving MMX register */
372 {8, 8}, /* cost of loading MMX registers
373 in SImode and DImode */
374 {8, 8}, /* cost of storing MMX registers
375 in SImode and DImode */
376 2, /* cost of moving SSE register */
377 {4, 8, 16}, /* cost of loading SSE registers
378 in SImode, DImode and TImode */
379 {4, 8, 16}, /* cost of storing SSE registers
380 in SImode, DImode and TImode */
381 3, /* MMX or SSE register to integer */
382 8, /* size of l1 cache. */
383 8, /* size of l2 cache */
384 0, /* size of prefetch block */
385 0, /* number of parallel prefetches */
387 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
388 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
389 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
390 COSTS_N_INSNS (1), /* cost of FABS instruction. */
391 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
392 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 1, /* scalar_stmt_cost. */
396 1, /* scalar load_cost. */
397 1, /* scalar_store_cost. */
398 1, /* vec_stmt_cost. */
399 1, /* vec_to_scalar_cost. */
400 1, /* scalar_to_vec_cost. */
401 1, /* vec_align_load_cost. */
402 2, /* vec_unalign_load_cost. */
403 1, /* vec_store_cost. */
404 3, /* cond_taken_branch_cost. */
405 1, /* cond_not_taken_branch_cost. */
409 struct processor_costs lakemont_cost = {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (11), /* HI */
416 COSTS_N_INSNS (11), /* SI */
417 COSTS_N_INSNS (11), /* DI */
418 COSTS_N_INSNS (11)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (25), /* HI */
422 COSTS_N_INSNS (25), /* SI */
423 COSTS_N_INSNS (25), /* DI */
424 COSTS_N_INSNS (25)}, /* other */
425 COSTS_N_INSNS (3), /* cost of movsx */
426 COSTS_N_INSNS (2), /* cost of movzx */
427 8, /* "large" insn */
429 6, /* cost for loading QImode using movzbl */
430 {2, 4, 2}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 4, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 8, /* cost of moving MMX register */
440 {8, 8}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {8, 8}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {4, 8, 16}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {4, 8, 16}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 8, /* size of l2 cache */
452 0, /* size of prefetch block */
453 0, /* number of parallel prefetches */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (1), /* cost of FABS instruction. */
459 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
476 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
477 (we ensure the alignment). For small blocks inline loop is still a
478 noticeable win, for bigger blocks either rep movsl or rep movsb is
479 way to go. Rep movsb has apparently more expensive startup time in CPU,
480 but after 4K the difference is down in the noise. */
481 static stringop_algs pentiumpro_memcpy[2] = {
482 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
483 {8192, rep_prefix_4_byte, false},
484 {-1, rep_prefix_1_byte, false}}},
485 DUMMY_STRINGOP_ALGS};
486 static stringop_algs pentiumpro_memset[2] = {
487 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
488 {8192, rep_prefix_4_byte, false},
489 {-1, libcall, false}}},
490 DUMMY_STRINGOP_ALGS};
492 struct processor_costs pentiumpro_cost = {
493 COSTS_N_INSNS (1), /* cost of an add instruction */
494 COSTS_N_INSNS (1), /* cost of a lea instruction */
495 COSTS_N_INSNS (1), /* variable shift costs */
496 COSTS_N_INSNS (1), /* constant shift costs */
497 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
498 COSTS_N_INSNS (4), /* HI */
499 COSTS_N_INSNS (4), /* SI */
500 COSTS_N_INSNS (4), /* DI */
501 COSTS_N_INSNS (4)}, /* other */
502 0, /* cost of multiply per each bit set */
503 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
504 COSTS_N_INSNS (17), /* HI */
505 COSTS_N_INSNS (17), /* SI */
506 COSTS_N_INSNS (17), /* DI */
507 COSTS_N_INSNS (17)}, /* other */
508 COSTS_N_INSNS (1), /* cost of movsx */
509 COSTS_N_INSNS (1), /* cost of movzx */
510 8, /* "large" insn */
512 2, /* cost for loading QImode using movzbl */
513 {4, 4, 4}, /* cost of loading integer registers
514 in QImode, HImode and SImode.
515 Relative to reg-reg move (2). */
516 {2, 2, 2}, /* cost of storing integer registers */
517 2, /* cost of reg,reg fld/fst */
518 {2, 2, 6}, /* cost of loading fp registers
519 in SFmode, DFmode and XFmode */
520 {4, 4, 6}, /* cost of storing fp registers
521 in SFmode, DFmode and XFmode */
522 2, /* cost of moving MMX register */
523 {2, 2}, /* cost of loading MMX registers
524 in SImode and DImode */
525 {2, 2}, /* cost of storing MMX registers
526 in SImode and DImode */
527 2, /* cost of moving SSE register */
528 {2, 2, 8}, /* cost of loading SSE registers
529 in SImode, DImode and TImode */
530 {2, 2, 8}, /* cost of storing SSE registers
531 in SImode, DImode and TImode */
532 3, /* MMX or SSE register to integer */
533 8, /* size of l1 cache. */
534 256, /* size of l2 cache */
535 32, /* size of prefetch block */
536 6, /* number of parallel prefetches */
538 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
539 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
540 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
541 COSTS_N_INSNS (2), /* cost of FABS instruction. */
542 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
543 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
546 1, /* scalar_stmt_cost. */
547 1, /* scalar load_cost. */
548 1, /* scalar_store_cost. */
549 1, /* vec_stmt_cost. */
550 1, /* vec_to_scalar_cost. */
551 1, /* scalar_to_vec_cost. */
552 1, /* vec_align_load_cost. */
553 2, /* vec_unalign_load_cost. */
554 1, /* vec_store_cost. */
555 3, /* cond_taken_branch_cost. */
556 1, /* cond_not_taken_branch_cost. */
559 static stringop_algs geode_memcpy[2] = {
560 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
561 DUMMY_STRINGOP_ALGS};
562 static stringop_algs geode_memset[2] = {
563 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
564 DUMMY_STRINGOP_ALGS};
566 struct processor_costs geode_cost = {
567 COSTS_N_INSNS (1), /* cost of an add instruction */
568 COSTS_N_INSNS (1), /* cost of a lea instruction */
569 COSTS_N_INSNS (2), /* variable shift costs */
570 COSTS_N_INSNS (1), /* constant shift costs */
571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
572 COSTS_N_INSNS (4), /* HI */
573 COSTS_N_INSNS (7), /* SI */
574 COSTS_N_INSNS (7), /* DI */
575 COSTS_N_INSNS (7)}, /* other */
576 0, /* cost of multiply per each bit set */
577 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
578 COSTS_N_INSNS (23), /* HI */
579 COSTS_N_INSNS (39), /* SI */
580 COSTS_N_INSNS (39), /* DI */
581 COSTS_N_INSNS (39)}, /* other */
582 COSTS_N_INSNS (1), /* cost of movsx */
583 COSTS_N_INSNS (1), /* cost of movzx */
584 8, /* "large" insn */
586 1, /* cost for loading QImode using movzbl */
587 {1, 1, 1}, /* cost of loading integer registers
588 in QImode, HImode and SImode.
589 Relative to reg-reg move (2). */
590 {1, 1, 1}, /* cost of storing integer registers */
591 1, /* cost of reg,reg fld/fst */
592 {1, 1, 1}, /* cost of loading fp registers
593 in SFmode, DFmode and XFmode */
594 {4, 6, 6}, /* cost of storing fp registers
595 in SFmode, DFmode and XFmode */
597 1, /* cost of moving MMX register */
598 {1, 1}, /* cost of loading MMX registers
599 in SImode and DImode */
600 {1, 1}, /* cost of storing MMX registers
601 in SImode and DImode */
602 1, /* cost of moving SSE register */
603 {1, 1, 1}, /* cost of loading SSE registers
604 in SImode, DImode and TImode */
605 {1, 1, 1}, /* cost of storing SSE registers
606 in SImode, DImode and TImode */
607 1, /* MMX or SSE register to integer */
608 64, /* size of l1 cache. */
609 128, /* size of l2 cache. */
610 32, /* size of prefetch block */
611 1, /* number of parallel prefetches */
613 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
614 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
615 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
616 COSTS_N_INSNS (1), /* cost of FABS instruction. */
617 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
618 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
621 1, /* scalar_stmt_cost. */
622 1, /* scalar load_cost. */
623 1, /* scalar_store_cost. */
624 1, /* vec_stmt_cost. */
625 1, /* vec_to_scalar_cost. */
626 1, /* scalar_to_vec_cost. */
627 1, /* vec_align_load_cost. */
628 2, /* vec_unalign_load_cost. */
629 1, /* vec_store_cost. */
630 3, /* cond_taken_branch_cost. */
631 1, /* cond_not_taken_branch_cost. */
634 static stringop_algs k6_memcpy[2] = {
635 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
636 DUMMY_STRINGOP_ALGS};
637 static stringop_algs k6_memset[2] = {
638 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
639 DUMMY_STRINGOP_ALGS};
641 struct processor_costs k6_cost = {
642 COSTS_N_INSNS (1), /* cost of an add instruction */
643 COSTS_N_INSNS (2), /* cost of a lea instruction */
644 COSTS_N_INSNS (1), /* variable shift costs */
645 COSTS_N_INSNS (1), /* constant shift costs */
646 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
647 COSTS_N_INSNS (3), /* HI */
648 COSTS_N_INSNS (3), /* SI */
649 COSTS_N_INSNS (3), /* DI */
650 COSTS_N_INSNS (3)}, /* other */
651 0, /* cost of multiply per each bit set */
652 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
653 COSTS_N_INSNS (18), /* HI */
654 COSTS_N_INSNS (18), /* SI */
655 COSTS_N_INSNS (18), /* DI */
656 COSTS_N_INSNS (18)}, /* other */
657 COSTS_N_INSNS (2), /* cost of movsx */
658 COSTS_N_INSNS (2), /* cost of movzx */
659 8, /* "large" insn */
661 3, /* cost for loading QImode using movzbl */
662 {4, 5, 4}, /* cost of loading integer registers
663 in QImode, HImode and SImode.
664 Relative to reg-reg move (2). */
665 {2, 3, 2}, /* cost of storing integer registers */
666 4, /* cost of reg,reg fld/fst */
667 {6, 6, 6}, /* cost of loading fp registers
668 in SFmode, DFmode and XFmode */
669 {4, 4, 4}, /* cost of storing fp registers
670 in SFmode, DFmode and XFmode */
671 2, /* cost of moving MMX register */
672 {2, 2}, /* cost of loading MMX registers
673 in SImode and DImode */
674 {2, 2}, /* cost of storing MMX registers
675 in SImode and DImode */
676 2, /* cost of moving SSE register */
677 {2, 2, 8}, /* cost of loading SSE registers
678 in SImode, DImode and TImode */
679 {2, 2, 8}, /* cost of storing SSE registers
680 in SImode, DImode and TImode */
681 6, /* MMX or SSE register to integer */
682 32, /* size of l1 cache. */
683 32, /* size of l2 cache. Some models
684 have integrated l2 cache, but
685 optimizing for k6 is not important
686 enough to worry about that. */
687 32, /* size of prefetch block */
688 1, /* number of parallel prefetches */
690 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
691 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
692 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
693 COSTS_N_INSNS (2), /* cost of FABS instruction. */
694 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
695 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
698 1, /* scalar_stmt_cost. */
699 1, /* scalar load_cost. */
700 1, /* scalar_store_cost. */
701 1, /* vec_stmt_cost. */
702 1, /* vec_to_scalar_cost. */
703 1, /* scalar_to_vec_cost. */
704 1, /* vec_align_load_cost. */
705 2, /* vec_unalign_load_cost. */
706 1, /* vec_store_cost. */
707 3, /* cond_taken_branch_cost. */
708 1, /* cond_not_taken_branch_cost. */
711 /* For some reason, Athlon deals better with REP prefix (relative to loops)
712 compared to K8. Alignment becomes important after 8 bytes for memcpy and
713 128 bytes for memset. */
714 static stringop_algs athlon_memcpy[2] = {
715 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
716 DUMMY_STRINGOP_ALGS};
717 static stringop_algs athlon_memset[2] = {
718 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
719 DUMMY_STRINGOP_ALGS};
721 struct processor_costs athlon_cost = {
722 COSTS_N_INSNS (1), /* cost of an add instruction */
723 COSTS_N_INSNS (2), /* cost of a lea instruction */
724 COSTS_N_INSNS (1), /* variable shift costs */
725 COSTS_N_INSNS (1), /* constant shift costs */
726 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
727 COSTS_N_INSNS (5), /* HI */
728 COSTS_N_INSNS (5), /* SI */
729 COSTS_N_INSNS (5), /* DI */
730 COSTS_N_INSNS (5)}, /* other */
731 0, /* cost of multiply per each bit set */
732 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
733 COSTS_N_INSNS (26), /* HI */
734 COSTS_N_INSNS (42), /* SI */
735 COSTS_N_INSNS (74), /* DI */
736 COSTS_N_INSNS (74)}, /* other */
737 COSTS_N_INSNS (1), /* cost of movsx */
738 COSTS_N_INSNS (1), /* cost of movzx */
739 8, /* "large" insn */
741 4, /* cost for loading QImode using movzbl */
742 {3, 4, 3}, /* cost of loading integer registers
743 in QImode, HImode and SImode.
744 Relative to reg-reg move (2). */
745 {3, 4, 3}, /* cost of storing integer registers */
746 4, /* cost of reg,reg fld/fst */
747 {4, 4, 12}, /* cost of loading fp registers
748 in SFmode, DFmode and XFmode */
749 {6, 6, 8}, /* cost of storing fp registers
750 in SFmode, DFmode and XFmode */
751 2, /* cost of moving MMX register */
752 {4, 4}, /* cost of loading MMX registers
753 in SImode and DImode */
754 {4, 4}, /* cost of storing MMX registers
755 in SImode and DImode */
756 2, /* cost of moving SSE register */
757 {4, 4, 6}, /* cost of loading SSE registers
758 in SImode, DImode and TImode */
759 {4, 4, 5}, /* cost of storing SSE registers
760 in SImode, DImode and TImode */
761 5, /* MMX or SSE register to integer */
762 64, /* size of l1 cache. */
763 256, /* size of l2 cache. */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
767 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (2), /* cost of FABS instruction. */
771 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
775 1, /* scalar_stmt_cost. */
776 1, /* scalar load_cost. */
777 1, /* scalar_store_cost. */
778 1, /* vec_stmt_cost. */
779 1, /* vec_to_scalar_cost. */
780 1, /* scalar_to_vec_cost. */
781 1, /* vec_align_load_cost. */
782 2, /* vec_unalign_load_cost. */
783 1, /* vec_store_cost. */
784 3, /* cond_taken_branch_cost. */
785 1, /* cond_not_taken_branch_cost. */
788 /* K8 has optimized REP instruction for medium sized blocks, but for very
789 small blocks it is better to use loop. For large blocks, libcall can
790 do nontemporary accesses and beat inline considerably. */
791 static stringop_algs k8_memcpy[2] = {
792 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
793 {-1, rep_prefix_4_byte, false}}},
794 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
795 {-1, libcall, false}}}};
796 static stringop_algs k8_memset[2] = {
797 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
798 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
799 {libcall, {{48, unrolled_loop, false},
800 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
802 struct processor_costs k8_cost = {
803 COSTS_N_INSNS (1), /* cost of an add instruction */
804 COSTS_N_INSNS (2), /* cost of a lea instruction */
805 COSTS_N_INSNS (1), /* variable shift costs */
806 COSTS_N_INSNS (1), /* constant shift costs */
807 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
808 COSTS_N_INSNS (4), /* HI */
809 COSTS_N_INSNS (3), /* SI */
810 COSTS_N_INSNS (4), /* DI */
811 COSTS_N_INSNS (5)}, /* other */
812 0, /* cost of multiply per each bit set */
813 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
814 COSTS_N_INSNS (26), /* HI */
815 COSTS_N_INSNS (42), /* SI */
816 COSTS_N_INSNS (74), /* DI */
817 COSTS_N_INSNS (74)}, /* other */
818 COSTS_N_INSNS (1), /* cost of movsx */
819 COSTS_N_INSNS (1), /* cost of movzx */
820 8, /* "large" insn */
822 4, /* cost for loading QImode using movzbl */
823 {3, 4, 3}, /* cost of loading integer registers
824 in QImode, HImode and SImode.
825 Relative to reg-reg move (2). */
826 {3, 4, 3}, /* cost of storing integer registers */
827 4, /* cost of reg,reg fld/fst */
828 {4, 4, 12}, /* cost of loading fp registers
829 in SFmode, DFmode and XFmode */
830 {6, 6, 8}, /* cost of storing fp registers
831 in SFmode, DFmode and XFmode */
832 2, /* cost of moving MMX register */
833 {3, 3}, /* cost of loading MMX registers
834 in SImode and DImode */
835 {4, 4}, /* cost of storing MMX registers
836 in SImode and DImode */
837 2, /* cost of moving SSE register */
838 {4, 3, 6}, /* cost of loading SSE registers
839 in SImode, DImode and TImode */
840 {4, 4, 5}, /* cost of storing SSE registers
841 in SImode, DImode and TImode */
842 5, /* MMX or SSE register to integer */
843 64, /* size of l1 cache. */
844 512, /* size of l2 cache. */
845 64, /* size of prefetch block */
846 /* New AMD processors never drop prefetches; if they cannot be performed
847 immediately, they are queued. We set number of simultaneous prefetches
848 to a large constant to reflect this (it probably is not a good idea not
849 to limit number of prefetches at all, as their execution also takes some
851 100, /* number of parallel prefetches */
853 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
854 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
855 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
856 COSTS_N_INSNS (2), /* cost of FABS instruction. */
857 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
858 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
862 4, /* scalar_stmt_cost. */
863 2, /* scalar load_cost. */
864 2, /* scalar_store_cost. */
865 5, /* vec_stmt_cost. */
866 0, /* vec_to_scalar_cost. */
867 2, /* scalar_to_vec_cost. */
868 2, /* vec_align_load_cost. */
869 3, /* vec_unalign_load_cost. */
870 3, /* vec_store_cost. */
871 3, /* cond_taken_branch_cost. */
872 2, /* cond_not_taken_branch_cost. */
875 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
876 very small blocks it is better to use loop. For large blocks, libcall can
877 do nontemporary accesses and beat inline considerably. */
878 static stringop_algs amdfam10_memcpy[2] = {
879 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
880 {-1, rep_prefix_4_byte, false}}},
881 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
882 {-1, libcall, false}}}};
883 static stringop_algs amdfam10_memset[2] = {
884 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
885 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
886 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
887 {-1, libcall, false}}}};
888 struct processor_costs amdfam10_cost = {
889 COSTS_N_INSNS (1), /* cost of an add instruction */
890 COSTS_N_INSNS (2), /* cost of a lea instruction */
891 COSTS_N_INSNS (1), /* variable shift costs */
892 COSTS_N_INSNS (1), /* constant shift costs */
893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
894 COSTS_N_INSNS (4), /* HI */
895 COSTS_N_INSNS (3), /* SI */
896 COSTS_N_INSNS (4), /* DI */
897 COSTS_N_INSNS (5)}, /* other */
898 0, /* cost of multiply per each bit set */
899 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
900 COSTS_N_INSNS (35), /* HI */
901 COSTS_N_INSNS (51), /* SI */
902 COSTS_N_INSNS (83), /* DI */
903 COSTS_N_INSNS (83)}, /* other */
904 COSTS_N_INSNS (1), /* cost of movsx */
905 COSTS_N_INSNS (1), /* cost of movzx */
906 8, /* "large" insn */
908 4, /* cost for loading QImode using movzbl */
909 {3, 4, 3}, /* cost of loading integer registers
910 in QImode, HImode and SImode.
911 Relative to reg-reg move (2). */
912 {3, 4, 3}, /* cost of storing integer registers */
913 4, /* cost of reg,reg fld/fst */
914 {4, 4, 12}, /* cost of loading fp registers
915 in SFmode, DFmode and XFmode */
916 {6, 6, 8}, /* cost of storing fp registers
917 in SFmode, DFmode and XFmode */
918 2, /* cost of moving MMX register */
919 {3, 3}, /* cost of loading MMX registers
920 in SImode and DImode */
921 {4, 4}, /* cost of storing MMX registers
922 in SImode and DImode */
923 2, /* cost of moving SSE register */
924 {4, 4, 3}, /* cost of loading SSE registers
925 in SImode, DImode and TImode */
926 {4, 4, 5}, /* cost of storing SSE registers
927 in SImode, DImode and TImode */
928 3, /* MMX or SSE register to integer */
930 MOVD reg64, xmmreg Double FSTORE 4
931 MOVD reg32, xmmreg Double FSTORE 4
933 MOVD reg64, xmmreg Double FADD 3
935 MOVD reg32, xmmreg Double FADD 3
937 64, /* size of l1 cache. */
938 512, /* size of l2 cache. */
939 64, /* size of prefetch block */
940 /* New AMD processors never drop prefetches; if they cannot be performed
941 immediately, they are queued. We set number of simultaneous prefetches
942 to a large constant to reflect this (it probably is not a good idea not
943 to limit number of prefetches at all, as their execution also takes some
945 100, /* number of parallel prefetches */
947 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
948 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
949 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
950 COSTS_N_INSNS (2), /* cost of FABS instruction. */
951 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
952 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
956 4, /* scalar_stmt_cost. */
957 2, /* scalar load_cost. */
958 2, /* scalar_store_cost. */
959 6, /* vec_stmt_cost. */
960 0, /* vec_to_scalar_cost. */
961 2, /* scalar_to_vec_cost. */
962 2, /* vec_align_load_cost. */
963 2, /* vec_unalign_load_cost. */
964 2, /* vec_store_cost. */
965 2, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
969 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
970 very small blocks it is better to use loop. For large blocks, libcall
971 can do nontemporary accesses and beat inline considerably. */
972 static stringop_algs bdver1_memcpy[2] = {
973 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
974 {-1, rep_prefix_4_byte, false}}},
975 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
976 {-1, libcall, false}}}};
977 static stringop_algs bdver1_memset[2] = {
978 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
979 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
980 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
981 {-1, libcall, false}}}};
983 const struct processor_costs bdver1_cost = {
984 COSTS_N_INSNS (1), /* cost of an add instruction */
985 COSTS_N_INSNS (1), /* cost of a lea instruction */
986 COSTS_N_INSNS (1), /* variable shift costs */
987 COSTS_N_INSNS (1), /* constant shift costs */
988 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
989 COSTS_N_INSNS (4), /* HI */
990 COSTS_N_INSNS (4), /* SI */
991 COSTS_N_INSNS (6), /* DI */
992 COSTS_N_INSNS (6)}, /* other */
993 0, /* cost of multiply per each bit set */
994 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
995 COSTS_N_INSNS (35), /* HI */
996 COSTS_N_INSNS (51), /* SI */
997 COSTS_N_INSNS (83), /* DI */
998 COSTS_N_INSNS (83)}, /* other */
999 COSTS_N_INSNS (1), /* cost of movsx */
1000 COSTS_N_INSNS (1), /* cost of movzx */
1001 8, /* "large" insn */
1003 4, /* cost for loading QImode using movzbl */
1004 {5, 5, 4}, /* cost of loading integer registers
1005 in QImode, HImode and SImode.
1006 Relative to reg-reg move (2). */
1007 {4, 4, 4}, /* cost of storing integer registers */
1008 2, /* cost of reg,reg fld/fst */
1009 {5, 5, 12}, /* cost of loading fp registers
1010 in SFmode, DFmode and XFmode */
1011 {4, 4, 8}, /* cost of storing fp registers
1012 in SFmode, DFmode and XFmode */
1013 2, /* cost of moving MMX register */
1014 {4, 4}, /* cost of loading MMX registers
1015 in SImode and DImode */
1016 {4, 4}, /* cost of storing MMX registers
1017 in SImode and DImode */
1018 2, /* cost of moving SSE register */
1019 {4, 4, 4}, /* cost of loading SSE registers
1020 in SImode, DImode and TImode */
1021 {4, 4, 4}, /* cost of storing SSE registers
1022 in SImode, DImode and TImode */
1023 2, /* MMX or SSE register to integer */
1025 MOVD reg64, xmmreg Double FSTORE 4
1026 MOVD reg32, xmmreg Double FSTORE 4
1028 MOVD reg64, xmmreg Double FADD 3
1030 MOVD reg32, xmmreg Double FADD 3
1032 16, /* size of l1 cache. */
1033 2048, /* size of l2 cache. */
1034 64, /* size of prefetch block */
1035 /* New AMD processors never drop prefetches; if they cannot be performed
1036 immediately, they are queued. We set number of simultaneous prefetches
1037 to a large constant to reflect this (it probably is not a good idea not
1038 to limit number of prefetches at all, as their execution also takes some
1040 100, /* number of parallel prefetches */
1041 2, /* Branch cost */
1042 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1043 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1044 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1045 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1046 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1047 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1051 6, /* scalar_stmt_cost. */
1052 4, /* scalar load_cost. */
1053 4, /* scalar_store_cost. */
1054 6, /* vec_stmt_cost. */
1055 0, /* vec_to_scalar_cost. */
1056 2, /* scalar_to_vec_cost. */
1057 4, /* vec_align_load_cost. */
1058 4, /* vec_unalign_load_cost. */
1059 4, /* vec_store_cost. */
1060 4, /* cond_taken_branch_cost. */
1061 2, /* cond_not_taken_branch_cost. */
1064 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1065 very small blocks it is better to use loop. For large blocks, libcall
1066 can do nontemporary accesses and beat inline considerably. */
1068 static stringop_algs bdver2_memcpy[2] = {
1069 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1070 {-1, rep_prefix_4_byte, false}}},
1071 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1072 {-1, libcall, false}}}};
1073 static stringop_algs bdver2_memset[2] = {
1074 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1075 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1076 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1077 {-1, libcall, false}}}};
1079 const struct processor_costs bdver2_cost = {
1080 COSTS_N_INSNS (1), /* cost of an add instruction */
1081 COSTS_N_INSNS (1), /* cost of a lea instruction */
1082 COSTS_N_INSNS (1), /* variable shift costs */
1083 COSTS_N_INSNS (1), /* constant shift costs */
1084 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1085 COSTS_N_INSNS (4), /* HI */
1086 COSTS_N_INSNS (4), /* SI */
1087 COSTS_N_INSNS (6), /* DI */
1088 COSTS_N_INSNS (6)}, /* other */
1089 0, /* cost of multiply per each bit set */
1090 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1091 COSTS_N_INSNS (35), /* HI */
1092 COSTS_N_INSNS (51), /* SI */
1093 COSTS_N_INSNS (83), /* DI */
1094 COSTS_N_INSNS (83)}, /* other */
1095 COSTS_N_INSNS (1), /* cost of movsx */
1096 COSTS_N_INSNS (1), /* cost of movzx */
1097 8, /* "large" insn */
1099 4, /* cost for loading QImode using movzbl */
1100 {5, 5, 4}, /* cost of loading integer registers
1101 in QImode, HImode and SImode.
1102 Relative to reg-reg move (2). */
1103 {4, 4, 4}, /* cost of storing integer registers */
1104 2, /* cost of reg,reg fld/fst */
1105 {5, 5, 12}, /* cost of loading fp registers
1106 in SFmode, DFmode and XFmode */
1107 {4, 4, 8}, /* cost of storing fp registers
1108 in SFmode, DFmode and XFmode */
1109 2, /* cost of moving MMX register */
1110 {4, 4}, /* cost of loading MMX registers
1111 in SImode and DImode */
1112 {4, 4}, /* cost of storing MMX registers
1113 in SImode and DImode */
1114 2, /* cost of moving SSE register */
1115 {4, 4, 4}, /* cost of loading SSE registers
1116 in SImode, DImode and TImode */
1117 {4, 4, 4}, /* cost of storing SSE registers
1118 in SImode, DImode and TImode */
1119 2, /* MMX or SSE register to integer */
1121 MOVD reg64, xmmreg Double FSTORE 4
1122 MOVD reg32, xmmreg Double FSTORE 4
1124 MOVD reg64, xmmreg Double FADD 3
1126 MOVD reg32, xmmreg Double FADD 3
1128 16, /* size of l1 cache. */
1129 2048, /* size of l2 cache. */
1130 64, /* size of prefetch block */
1131 /* New AMD processors never drop prefetches; if they cannot be performed
1132 immediately, they are queued. We set number of simultaneous prefetches
1133 to a large constant to reflect this (it probably is not a good idea not
1134 to limit number of prefetches at all, as their execution also takes some
1136 100, /* number of parallel prefetches */
1137 2, /* Branch cost */
1138 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1139 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1140 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1141 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1142 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1143 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1147 6, /* scalar_stmt_cost. */
1148 4, /* scalar load_cost. */
1149 4, /* scalar_store_cost. */
1150 6, /* vec_stmt_cost. */
1151 0, /* vec_to_scalar_cost. */
1152 2, /* scalar_to_vec_cost. */
1153 4, /* vec_align_load_cost. */
1154 4, /* vec_unalign_load_cost. */
1155 4, /* vec_store_cost. */
1156 4, /* cond_taken_branch_cost. */
1157 2, /* cond_not_taken_branch_cost. */
1161 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1162 very small blocks it is better to use loop. For large blocks, libcall
1163 can do nontemporary accesses and beat inline considerably. */
1164 static stringop_algs bdver3_memcpy[2] = {
1165 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1166 {-1, rep_prefix_4_byte, false}}},
1167 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1168 {-1, libcall, false}}}};
1169 static stringop_algs bdver3_memset[2] = {
1170 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1171 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1172 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1173 {-1, libcall, false}}}};
1174 struct processor_costs bdver3_cost = {
1175 COSTS_N_INSNS (1), /* cost of an add instruction */
1176 COSTS_N_INSNS (1), /* cost of a lea instruction */
1177 COSTS_N_INSNS (1), /* variable shift costs */
1178 COSTS_N_INSNS (1), /* constant shift costs */
1179 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1180 COSTS_N_INSNS (4), /* HI */
1181 COSTS_N_INSNS (4), /* SI */
1182 COSTS_N_INSNS (6), /* DI */
1183 COSTS_N_INSNS (6)}, /* other */
1184 0, /* cost of multiply per each bit set */
1185 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1186 COSTS_N_INSNS (35), /* HI */
1187 COSTS_N_INSNS (51), /* SI */
1188 COSTS_N_INSNS (83), /* DI */
1189 COSTS_N_INSNS (83)}, /* other */
1190 COSTS_N_INSNS (1), /* cost of movsx */
1191 COSTS_N_INSNS (1), /* cost of movzx */
1192 8, /* "large" insn */
1194 4, /* cost for loading QImode using movzbl */
1195 {5, 5, 4}, /* cost of loading integer registers
1196 in QImode, HImode and SImode.
1197 Relative to reg-reg move (2). */
1198 {4, 4, 4}, /* cost of storing integer registers */
1199 2, /* cost of reg,reg fld/fst */
1200 {5, 5, 12}, /* cost of loading fp registers
1201 in SFmode, DFmode and XFmode */
1202 {4, 4, 8}, /* cost of storing fp registers
1203 in SFmode, DFmode and XFmode */
1204 2, /* cost of moving MMX register */
1205 {4, 4}, /* cost of loading MMX registers
1206 in SImode and DImode */
1207 {4, 4}, /* cost of storing MMX registers
1208 in SImode and DImode */
1209 2, /* cost of moving SSE register */
1210 {4, 4, 4}, /* cost of loading SSE registers
1211 in SImode, DImode and TImode */
1212 {4, 4, 4}, /* cost of storing SSE registers
1213 in SImode, DImode and TImode */
1214 2, /* MMX or SSE register to integer */
1215 16, /* size of l1 cache. */
1216 2048, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1234 6, /* scalar_stmt_cost. */
1235 4, /* scalar load_cost. */
1236 4, /* scalar_store_cost. */
1237 6, /* vec_stmt_cost. */
1238 0, /* vec_to_scalar_cost. */
1239 2, /* scalar_to_vec_cost. */
1240 4, /* vec_align_load_cost. */
1241 4, /* vec_unalign_load_cost. */
1242 4, /* vec_store_cost. */
1243 4, /* cond_taken_branch_cost. */
1244 2, /* cond_not_taken_branch_cost. */
1247 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1248 very small blocks it is better to use loop. For large blocks, libcall
1249 can do nontemporary accesses and beat inline considerably. */
1250 static stringop_algs bdver4_memcpy[2] = {
1251 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1252 {-1, rep_prefix_4_byte, false}}},
1253 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1254 {-1, libcall, false}}}};
1255 static stringop_algs bdver4_memset[2] = {
1256 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1257 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1258 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1259 {-1, libcall, false}}}};
1260 struct processor_costs bdver4_cost = {
1261 COSTS_N_INSNS (1), /* cost of an add instruction */
1262 COSTS_N_INSNS (1), /* cost of a lea instruction */
1263 COSTS_N_INSNS (1), /* variable shift costs */
1264 COSTS_N_INSNS (1), /* constant shift costs */
1265 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1266 COSTS_N_INSNS (4), /* HI */
1267 COSTS_N_INSNS (4), /* SI */
1268 COSTS_N_INSNS (6), /* DI */
1269 COSTS_N_INSNS (6)}, /* other */
1270 0, /* cost of multiply per each bit set */
1271 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1272 COSTS_N_INSNS (35), /* HI */
1273 COSTS_N_INSNS (51), /* SI */
1274 COSTS_N_INSNS (83), /* DI */
1275 COSTS_N_INSNS (83)}, /* other */
1276 COSTS_N_INSNS (1), /* cost of movsx */
1277 COSTS_N_INSNS (1), /* cost of movzx */
1278 8, /* "large" insn */
1280 4, /* cost for loading QImode using movzbl */
1281 {5, 5, 4}, /* cost of loading integer registers
1282 in QImode, HImode and SImode.
1283 Relative to reg-reg move (2). */
1284 {4, 4, 4}, /* cost of storing integer registers */
1285 2, /* cost of reg,reg fld/fst */
1286 {5, 5, 12}, /* cost of loading fp registers
1287 in SFmode, DFmode and XFmode */
1288 {4, 4, 8}, /* cost of storing fp registers
1289 in SFmode, DFmode and XFmode */
1290 2, /* cost of moving MMX register */
1291 {4, 4}, /* cost of loading MMX registers
1292 in SImode and DImode */
1293 {4, 4}, /* cost of storing MMX registers
1294 in SImode and DImode */
1295 2, /* cost of moving SSE register */
1296 {4, 4, 4}, /* cost of loading SSE registers
1297 in SImode, DImode and TImode */
1298 {4, 4, 4}, /* cost of storing SSE registers
1299 in SImode, DImode and TImode */
1300 2, /* MMX or SSE register to integer */
1301 16, /* size of l1 cache. */
1302 2048, /* size of l2 cache. */
1303 64, /* size of prefetch block */
1304 /* New AMD processors never drop prefetches; if they cannot be performed
1305 immediately, they are queued. We set number of simultaneous prefetches
1306 to a large constant to reflect this (it probably is not a good idea not
1307 to limit number of prefetches at all, as their execution also takes some
1309 100, /* number of parallel prefetches */
1310 2, /* Branch cost */
1311 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1312 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1313 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1314 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1315 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1316 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 6, /* scalar_stmt_cost. */
1321 4, /* scalar load_cost. */
1322 4, /* scalar_store_cost. */
1323 6, /* vec_stmt_cost. */
1324 0, /* vec_to_scalar_cost. */
1325 2, /* scalar_to_vec_cost. */
1326 4, /* vec_align_load_cost. */
1327 4, /* vec_unalign_load_cost. */
1328 4, /* vec_store_cost. */
1329 4, /* cond_taken_branch_cost. */
1330 2, /* cond_not_taken_branch_cost. */
1334 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1335 very small blocks it is better to use loop. For large blocks, libcall
1336 can do nontemporary accesses and beat inline considerably. */
1337 static stringop_algs znver1_memcpy[2] = {
1338 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1339 {-1, rep_prefix_4_byte, false}}},
1340 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1341 {-1, libcall, false}}}};
1342 static stringop_algs znver1_memset[2] = {
1343 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1344 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1345 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1346 {-1, libcall, false}}}};
1347 struct processor_costs znver1_cost = {
1348 COSTS_N_INSNS (1), /* cost of an add instruction. */
1349 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1350 COSTS_N_INSNS (1), /* variable shift costs. */
1351 COSTS_N_INSNS (1), /* constant shift costs. */
1352 {COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
1353 COSTS_N_INSNS (4), /* HI. */
1354 COSTS_N_INSNS (4), /* SI. */
1355 COSTS_N_INSNS (6), /* DI. */
1356 COSTS_N_INSNS (6)}, /* other. */
1357 0, /* cost of multiply per each bit
1359 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1360 COSTS_N_INSNS (35), /* HI. */
1361 COSTS_N_INSNS (51), /* SI. */
1362 COSTS_N_INSNS (83), /* DI. */
1363 COSTS_N_INSNS (83)}, /* other. */
1364 COSTS_N_INSNS (1), /* cost of movsx. */
1365 COSTS_N_INSNS (1), /* cost of movzx. */
1366 8, /* "large" insn. */
1367 9, /* MOVE_RATIO. */
1368 4, /* cost for loading QImode using
1370 {5, 5, 4}, /* cost of loading integer registers
1371 in QImode, HImode and SImode.
1372 Relative to reg-reg move (2). */
1373 {4, 4, 4}, /* cost of storing integer
1375 2, /* cost of reg,reg fld/fst. */
1376 {5, 5, 12}, /* cost of loading fp registers
1377 in SFmode, DFmode and XFmode. */
1378 {4, 4, 8}, /* cost of storing fp registers
1379 in SFmode, DFmode and XFmode. */
1380 2, /* cost of moving MMX register. */
1381 {4, 4}, /* cost of loading MMX registers
1382 in SImode and DImode. */
1383 {4, 4}, /* cost of storing MMX registers
1384 in SImode and DImode. */
1385 2, /* cost of moving SSE register. */
1386 {4, 4, 4}, /* cost of loading SSE registers
1387 in SImode, DImode and TImode. */
1388 {4, 4, 4}, /* cost of storing SSE registers
1389 in SImode, DImode and TImode. */
1390 2, /* MMX or SSE register to integer. */
1391 32, /* size of l1 cache. */
1392 512, /* size of l2 cache. */
1393 64, /* size of prefetch block. */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches. */
1400 2, /* Branch cost. */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1410 6, /* scalar_stmt_cost. */
1411 4, /* scalar load_cost. */
1412 4, /* scalar_store_cost. */
1413 6, /* vec_stmt_cost. */
1414 0, /* vec_to_scalar_cost. */
1415 2, /* scalar_to_vec_cost. */
1416 4, /* vec_align_load_cost. */
1417 4, /* vec_unalign_load_cost. */
1418 4, /* vec_store_cost. */
1419 4, /* cond_taken_branch_cost. */
1420 2, /* cond_not_taken_branch_cost. */
1423 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1424 very small blocks it is better to use loop. For large blocks, libcall can
1425 do nontemporary accesses and beat inline considerably. */
1426 static stringop_algs btver1_memcpy[2] = {
1427 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1428 {-1, rep_prefix_4_byte, false}}},
1429 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1430 {-1, libcall, false}}}};
1431 static stringop_algs btver1_memset[2] = {
1432 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1433 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1434 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1435 {-1, libcall, false}}}};
1436 const struct processor_costs btver1_cost = {
1437 COSTS_N_INSNS (1), /* cost of an add instruction */
1438 COSTS_N_INSNS (2), /* cost of a lea instruction */
1439 COSTS_N_INSNS (1), /* variable shift costs */
1440 COSTS_N_INSNS (1), /* constant shift costs */
1441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1442 COSTS_N_INSNS (4), /* HI */
1443 COSTS_N_INSNS (3), /* SI */
1444 COSTS_N_INSNS (4), /* DI */
1445 COSTS_N_INSNS (5)}, /* other */
1446 0, /* cost of multiply per each bit set */
1447 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1448 COSTS_N_INSNS (35), /* HI */
1449 COSTS_N_INSNS (51), /* SI */
1450 COSTS_N_INSNS (83), /* DI */
1451 COSTS_N_INSNS (83)}, /* other */
1452 COSTS_N_INSNS (1), /* cost of movsx */
1453 COSTS_N_INSNS (1), /* cost of movzx */
1454 8, /* "large" insn */
1456 4, /* cost for loading QImode using movzbl */
1457 {3, 4, 3}, /* cost of loading integer registers
1458 in QImode, HImode and SImode.
1459 Relative to reg-reg move (2). */
1460 {3, 4, 3}, /* cost of storing integer registers */
1461 4, /* cost of reg,reg fld/fst */
1462 {4, 4, 12}, /* cost of loading fp registers
1463 in SFmode, DFmode and XFmode */
1464 {6, 6, 8}, /* cost of storing fp registers
1465 in SFmode, DFmode and XFmode */
1466 2, /* cost of moving MMX register */
1467 {3, 3}, /* cost of loading MMX registers
1468 in SImode and DImode */
1469 {4, 4}, /* cost of storing MMX registers
1470 in SImode and DImode */
1471 2, /* cost of moving SSE register */
1472 {4, 4, 3}, /* cost of loading SSE registers
1473 in SImode, DImode and TImode */
1474 {4, 4, 5}, /* cost of storing SSE registers
1475 in SImode, DImode and TImode */
1476 3, /* MMX or SSE register to integer */
1478 MOVD reg64, xmmreg Double FSTORE 4
1479 MOVD reg32, xmmreg Double FSTORE 4
1481 MOVD reg64, xmmreg Double FADD 3
1483 MOVD reg32, xmmreg Double FADD 3
1485 32, /* size of l1 cache. */
1486 512, /* size of l2 cache. */
1487 64, /* size of prefetch block */
1488 100, /* number of parallel prefetches */
1489 2, /* Branch cost */
1490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1492 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 static stringop_algs btver2_memcpy[2] = {
1513 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1514 {-1, rep_prefix_4_byte, false}}},
1515 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1516 {-1, libcall, false}}}};
1517 static stringop_algs btver2_memset[2] = {
1518 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1519 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1520 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1521 {-1, libcall, false}}}};
1522 const struct processor_costs btver2_cost = {
1523 COSTS_N_INSNS (1), /* cost of an add instruction */
1524 COSTS_N_INSNS (2), /* cost of a lea instruction */
1525 COSTS_N_INSNS (1), /* variable shift costs */
1526 COSTS_N_INSNS (1), /* constant shift costs */
1527 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1528 COSTS_N_INSNS (4), /* HI */
1529 COSTS_N_INSNS (3), /* SI */
1530 COSTS_N_INSNS (4), /* DI */
1531 COSTS_N_INSNS (5)}, /* other */
1532 0, /* cost of multiply per each bit set */
1533 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1534 COSTS_N_INSNS (35), /* HI */
1535 COSTS_N_INSNS (51), /* SI */
1536 COSTS_N_INSNS (83), /* DI */
1537 COSTS_N_INSNS (83)}, /* other */
1538 COSTS_N_INSNS (1), /* cost of movsx */
1539 COSTS_N_INSNS (1), /* cost of movzx */
1540 8, /* "large" insn */
1542 4, /* cost for loading QImode using movzbl */
1543 {3, 4, 3}, /* cost of loading integer registers
1544 in QImode, HImode and SImode.
1545 Relative to reg-reg move (2). */
1546 {3, 4, 3}, /* cost of storing integer registers */
1547 4, /* cost of reg,reg fld/fst */
1548 {4, 4, 12}, /* cost of loading fp registers
1549 in SFmode, DFmode and XFmode */
1550 {6, 6, 8}, /* cost of storing fp registers
1551 in SFmode, DFmode and XFmode */
1552 2, /* cost of moving MMX register */
1553 {3, 3}, /* cost of loading MMX registers
1554 in SImode and DImode */
1555 {4, 4}, /* cost of storing MMX registers
1556 in SImode and DImode */
1557 2, /* cost of moving SSE register */
1558 {4, 4, 3}, /* cost of loading SSE registers
1559 in SImode, DImode and TImode */
1560 {4, 4, 5}, /* cost of storing SSE registers
1561 in SImode, DImode and TImode */
1562 3, /* MMX or SSE register to integer */
1564 MOVD reg64, xmmreg Double FSTORE 4
1565 MOVD reg32, xmmreg Double FSTORE 4
1567 MOVD reg64, xmmreg Double FADD 3
1569 MOVD reg32, xmmreg Double FADD 3
1571 32, /* size of l1 cache. */
1572 2048, /* size of l2 cache. */
1573 64, /* size of prefetch block */
1574 100, /* number of parallel prefetches */
1575 2, /* Branch cost */
1576 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1577 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1578 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1579 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1580 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1581 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1584 4, /* scalar_stmt_cost. */
1585 2, /* scalar load_cost. */
1586 2, /* scalar_store_cost. */
1587 6, /* vec_stmt_cost. */
1588 0, /* vec_to_scalar_cost. */
1589 2, /* scalar_to_vec_cost. */
1590 2, /* vec_align_load_cost. */
1591 2, /* vec_unalign_load_cost. */
1592 2, /* vec_store_cost. */
1593 2, /* cond_taken_branch_cost. */
1594 1, /* cond_not_taken_branch_cost. */
1597 static stringop_algs pentium4_memcpy[2] = {
1598 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1599 DUMMY_STRINGOP_ALGS};
1600 static stringop_algs pentium4_memset[2] = {
1601 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1602 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 DUMMY_STRINGOP_ALGS};
1606 struct processor_costs pentium4_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (3), /* cost of a lea instruction */
1609 COSTS_N_INSNS (4), /* variable shift costs */
1610 COSTS_N_INSNS (4), /* constant shift costs */
1611 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (15), /* HI */
1613 COSTS_N_INSNS (15), /* SI */
1614 COSTS_N_INSNS (15), /* DI */
1615 COSTS_N_INSNS (15)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (56), /* HI */
1619 COSTS_N_INSNS (56), /* SI */
1620 COSTS_N_INSNS (56), /* DI */
1621 COSTS_N_INSNS (56)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 16, /* "large" insn */
1626 2, /* cost for loading QImode using movzbl */
1627 {4, 5, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {2, 3, 2}, /* cost of storing integer registers */
1631 2, /* cost of reg,reg fld/fst */
1632 {2, 2, 6}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {4, 4, 6}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {2, 2}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {2, 2}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 12, /* cost of moving SSE register */
1642 {12, 12, 12}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {2, 2, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 10, /* MMX or SSE register to integer */
1647 8, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 2, /* Branch cost */
1652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs nocona_memcpy[2] = {
1674 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1676 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1678 static stringop_algs nocona_memset[2] = {
1679 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1680 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1681 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1682 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 struct processor_costs nocona_cost = {
1686 COSTS_N_INSNS (1), /* cost of an add instruction */
1687 COSTS_N_INSNS (1), /* cost of a lea instruction */
1688 COSTS_N_INSNS (1), /* variable shift costs */
1689 COSTS_N_INSNS (1), /* constant shift costs */
1690 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1691 COSTS_N_INSNS (10), /* HI */
1692 COSTS_N_INSNS (10), /* SI */
1693 COSTS_N_INSNS (10), /* DI */
1694 COSTS_N_INSNS (10)}, /* other */
1695 0, /* cost of multiply per each bit set */
1696 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1697 COSTS_N_INSNS (66), /* HI */
1698 COSTS_N_INSNS (66), /* SI */
1699 COSTS_N_INSNS (66), /* DI */
1700 COSTS_N_INSNS (66)}, /* other */
1701 COSTS_N_INSNS (1), /* cost of movsx */
1702 COSTS_N_INSNS (1), /* cost of movzx */
1703 16, /* "large" insn */
1704 17, /* MOVE_RATIO */
1705 4, /* cost for loading QImode using movzbl */
1706 {4, 4, 4}, /* cost of loading integer registers
1707 in QImode, HImode and SImode.
1708 Relative to reg-reg move (2). */
1709 {4, 4, 4}, /* cost of storing integer registers */
1710 3, /* cost of reg,reg fld/fst */
1711 {12, 12, 12}, /* cost of loading fp registers
1712 in SFmode, DFmode and XFmode */
1713 {4, 4, 4}, /* cost of storing fp registers
1714 in SFmode, DFmode and XFmode */
1715 6, /* cost of moving MMX register */
1716 {12, 12}, /* cost of loading MMX registers
1717 in SImode and DImode */
1718 {12, 12}, /* cost of storing MMX registers
1719 in SImode and DImode */
1720 6, /* cost of moving SSE register */
1721 {12, 12, 12}, /* cost of loading SSE registers
1722 in SImode, DImode and TImode */
1723 {12, 12, 12}, /* cost of storing SSE registers
1724 in SImode, DImode and TImode */
1725 8, /* MMX or SSE register to integer */
1726 8, /* size of l1 cache. */
1727 1024, /* size of l2 cache. */
1728 64, /* size of prefetch block */
1729 8, /* number of parallel prefetches */
1730 1, /* Branch cost */
1731 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1732 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1733 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1734 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1735 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1736 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1739 1, /* scalar_stmt_cost. */
1740 1, /* scalar load_cost. */
1741 1, /* scalar_store_cost. */
1742 1, /* vec_stmt_cost. */
1743 1, /* vec_to_scalar_cost. */
1744 1, /* scalar_to_vec_cost. */
1745 1, /* vec_align_load_cost. */
1746 2, /* vec_unalign_load_cost. */
1747 1, /* vec_store_cost. */
1748 3, /* cond_taken_branch_cost. */
1749 1, /* cond_not_taken_branch_cost. */
1752 static stringop_algs atom_memcpy[2] = {
1753 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1754 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1756 static stringop_algs atom_memset[2] = {
1757 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1758 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1759 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1760 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 struct processor_costs atom_cost = {
1763 COSTS_N_INSNS (1), /* cost of an add instruction */
1764 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1765 COSTS_N_INSNS (1), /* variable shift costs */
1766 COSTS_N_INSNS (1), /* constant shift costs */
1767 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1768 COSTS_N_INSNS (4), /* HI */
1769 COSTS_N_INSNS (3), /* SI */
1770 COSTS_N_INSNS (4), /* DI */
1771 COSTS_N_INSNS (2)}, /* other */
1772 0, /* cost of multiply per each bit set */
1773 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1774 COSTS_N_INSNS (26), /* HI */
1775 COSTS_N_INSNS (42), /* SI */
1776 COSTS_N_INSNS (74), /* DI */
1777 COSTS_N_INSNS (74)}, /* other */
1778 COSTS_N_INSNS (1), /* cost of movsx */
1779 COSTS_N_INSNS (1), /* cost of movzx */
1780 8, /* "large" insn */
1781 17, /* MOVE_RATIO */
1782 4, /* cost for loading QImode using movzbl */
1783 {4, 4, 4}, /* cost of loading integer registers
1784 in QImode, HImode and SImode.
1785 Relative to reg-reg move (2). */
1786 {4, 4, 4}, /* cost of storing integer registers */
1787 4, /* cost of reg,reg fld/fst */
1788 {12, 12, 12}, /* cost of loading fp registers
1789 in SFmode, DFmode and XFmode */
1790 {6, 6, 8}, /* cost of storing fp registers
1791 in SFmode, DFmode and XFmode */
1792 2, /* cost of moving MMX register */
1793 {8, 8}, /* cost of loading MMX registers
1794 in SImode and DImode */
1795 {8, 8}, /* cost of storing MMX registers
1796 in SImode and DImode */
1797 2, /* cost of moving SSE register */
1798 {8, 8, 8}, /* cost of loading SSE registers
1799 in SImode, DImode and TImode */
1800 {8, 8, 8}, /* cost of storing SSE registers
1801 in SImode, DImode and TImode */
1802 5, /* MMX or SSE register to integer */
1803 32, /* size of l1 cache. */
1804 256, /* size of l2 cache. */
1805 64, /* size of prefetch block */
1806 6, /* number of parallel prefetches */
1807 3, /* Branch cost */
1808 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1809 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1810 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1811 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1812 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1813 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 1, /* scalar_stmt_cost. */
1817 1, /* scalar load_cost. */
1818 1, /* scalar_store_cost. */
1819 1, /* vec_stmt_cost. */
1820 1, /* vec_to_scalar_cost. */
1821 1, /* scalar_to_vec_cost. */
1822 1, /* vec_align_load_cost. */
1823 2, /* vec_unalign_load_cost. */
1824 1, /* vec_store_cost. */
1825 3, /* cond_taken_branch_cost. */
1826 1, /* cond_not_taken_branch_cost. */
1829 static stringop_algs slm_memcpy[2] = {
1830 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1831 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1832 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1833 static stringop_algs slm_memset[2] = {
1834 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1835 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1836 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1837 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1839 struct processor_costs slm_cost = {
1840 COSTS_N_INSNS (1), /* cost of an add instruction */
1841 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1842 COSTS_N_INSNS (1), /* variable shift costs */
1843 COSTS_N_INSNS (1), /* constant shift costs */
1844 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1845 COSTS_N_INSNS (3), /* HI */
1846 COSTS_N_INSNS (3), /* SI */
1847 COSTS_N_INSNS (4), /* DI */
1848 COSTS_N_INSNS (2)}, /* other */
1849 0, /* cost of multiply per each bit set */
1850 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1851 COSTS_N_INSNS (26), /* HI */
1852 COSTS_N_INSNS (42), /* SI */
1853 COSTS_N_INSNS (74), /* DI */
1854 COSTS_N_INSNS (74)}, /* other */
1855 COSTS_N_INSNS (1), /* cost of movsx */
1856 COSTS_N_INSNS (1), /* cost of movzx */
1857 8, /* "large" insn */
1858 17, /* MOVE_RATIO */
1859 4, /* cost for loading QImode using movzbl */
1860 {4, 4, 4}, /* cost of loading integer registers
1861 in QImode, HImode and SImode.
1862 Relative to reg-reg move (2). */
1863 {4, 4, 4}, /* cost of storing integer registers */
1864 4, /* cost of reg,reg fld/fst */
1865 {12, 12, 12}, /* cost of loading fp registers
1866 in SFmode, DFmode and XFmode */
1867 {6, 6, 8}, /* cost of storing fp registers
1868 in SFmode, DFmode and XFmode */
1869 2, /* cost of moving MMX register */
1870 {8, 8}, /* cost of loading MMX registers
1871 in SImode and DImode */
1872 {8, 8}, /* cost of storing MMX registers
1873 in SImode and DImode */
1874 2, /* cost of moving SSE register */
1875 {8, 8, 8}, /* cost of loading SSE registers
1876 in SImode, DImode and TImode */
1877 {8, 8, 8}, /* cost of storing SSE registers
1878 in SImode, DImode and TImode */
1879 5, /* MMX or SSE register to integer */
1880 32, /* size of l1 cache. */
1881 256, /* size of l2 cache. */
1882 64, /* size of prefetch block */
1883 6, /* number of parallel prefetches */
1884 3, /* Branch cost */
1885 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1886 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1887 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1888 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1889 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1890 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1893 1, /* scalar_stmt_cost. */
1894 1, /* scalar load_cost. */
1895 1, /* scalar_store_cost. */
1896 1, /* vec_stmt_cost. */
1897 4, /* vec_to_scalar_cost. */
1898 1, /* scalar_to_vec_cost. */
1899 1, /* vec_align_load_cost. */
1900 2, /* vec_unalign_load_cost. */
1901 1, /* vec_store_cost. */
1902 3, /* cond_taken_branch_cost. */
1903 1, /* cond_not_taken_branch_cost. */
1906 static stringop_algs intel_memcpy[2] = {
1907 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1908 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1909 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1910 static stringop_algs intel_memset[2] = {
1911 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1912 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1913 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1914 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1916 struct processor_costs intel_cost = {
1917 COSTS_N_INSNS (1), /* cost of an add instruction */
1918 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1919 COSTS_N_INSNS (1), /* variable shift costs */
1920 COSTS_N_INSNS (1), /* constant shift costs */
1921 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1922 COSTS_N_INSNS (3), /* HI */
1923 COSTS_N_INSNS (3), /* SI */
1924 COSTS_N_INSNS (4), /* DI */
1925 COSTS_N_INSNS (2)}, /* other */
1926 0, /* cost of multiply per each bit set */
1927 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1928 COSTS_N_INSNS (26), /* HI */
1929 COSTS_N_INSNS (42), /* SI */
1930 COSTS_N_INSNS (74), /* DI */
1931 COSTS_N_INSNS (74)}, /* other */
1932 COSTS_N_INSNS (1), /* cost of movsx */
1933 COSTS_N_INSNS (1), /* cost of movzx */
1934 8, /* "large" insn */
1935 17, /* MOVE_RATIO */
1936 4, /* cost for loading QImode using movzbl */
1937 {4, 4, 4}, /* cost of loading integer registers
1938 in QImode, HImode and SImode.
1939 Relative to reg-reg move (2). */
1940 {4, 4, 4}, /* cost of storing integer registers */
1941 4, /* cost of reg,reg fld/fst */
1942 {12, 12, 12}, /* cost of loading fp registers
1943 in SFmode, DFmode and XFmode */
1944 {6, 6, 8}, /* cost of storing fp registers
1945 in SFmode, DFmode and XFmode */
1946 2, /* cost of moving MMX register */
1947 {8, 8}, /* cost of loading MMX registers
1948 in SImode and DImode */
1949 {8, 8}, /* cost of storing MMX registers
1950 in SImode and DImode */
1951 2, /* cost of moving SSE register */
1952 {8, 8, 8}, /* cost of loading SSE registers
1953 in SImode, DImode and TImode */
1954 {8, 8, 8}, /* cost of storing SSE registers
1955 in SImode, DImode and TImode */
1956 5, /* MMX or SSE register to integer */
1957 32, /* size of l1 cache. */
1958 256, /* size of l2 cache. */
1959 64, /* size of prefetch block */
1960 6, /* number of parallel prefetches */
1961 3, /* Branch cost */
1962 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1963 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1964 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1965 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1966 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1967 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1970 1, /* scalar_stmt_cost. */
1971 1, /* scalar load_cost. */
1972 1, /* scalar_store_cost. */
1973 1, /* vec_stmt_cost. */
1974 4, /* vec_to_scalar_cost. */
1975 1, /* scalar_to_vec_cost. */
1976 1, /* vec_align_load_cost. */
1977 2, /* vec_unalign_load_cost. */
1978 1, /* vec_store_cost. */
1979 3, /* cond_taken_branch_cost. */
1980 1, /* cond_not_taken_branch_cost. */
1983 /* Generic should produce code tuned for Core-i7 (and newer chips)
1984 and btver1 (and newer chips). */
1986 static stringop_algs generic_memcpy[2] = {
1987 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1988 {-1, libcall, false}}},
1989 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1990 {-1, libcall, false}}}};
1991 static stringop_algs generic_memset[2] = {
1992 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1993 {-1, libcall, false}}},
1994 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1995 {-1, libcall, false}}}};
1997 struct processor_costs generic_cost = {
1998 COSTS_N_INSNS (1), /* cost of an add instruction */
1999 /* On all chips taken into consideration lea is 2 cycles and more. With
2000 this cost however our current implementation of synth_mult results in
2001 use of unnecessary temporary registers causing regression on several
2002 SPECfp benchmarks. */
2003 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2004 COSTS_N_INSNS (1), /* variable shift costs */
2005 COSTS_N_INSNS (1), /* constant shift costs */
2006 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2007 COSTS_N_INSNS (4), /* HI */
2008 COSTS_N_INSNS (3), /* SI */
2009 COSTS_N_INSNS (4), /* DI */
2010 COSTS_N_INSNS (2)}, /* other */
2011 0, /* cost of multiply per each bit set */
2012 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2013 COSTS_N_INSNS (26), /* HI */
2014 COSTS_N_INSNS (42), /* SI */
2015 COSTS_N_INSNS (74), /* DI */
2016 COSTS_N_INSNS (74)}, /* other */
2017 COSTS_N_INSNS (1), /* cost of movsx */
2018 COSTS_N_INSNS (1), /* cost of movzx */
2019 8, /* "large" insn */
2020 17, /* MOVE_RATIO */
2021 4, /* cost for loading QImode using movzbl */
2022 {4, 4, 4}, /* cost of loading integer registers
2023 in QImode, HImode and SImode.
2024 Relative to reg-reg move (2). */
2025 {4, 4, 4}, /* cost of storing integer registers */
2026 4, /* cost of reg,reg fld/fst */
2027 {12, 12, 12}, /* cost of loading fp registers
2028 in SFmode, DFmode and XFmode */
2029 {6, 6, 8}, /* cost of storing fp registers
2030 in SFmode, DFmode and XFmode */
2031 2, /* cost of moving MMX register */
2032 {8, 8}, /* cost of loading MMX registers
2033 in SImode and DImode */
2034 {8, 8}, /* cost of storing MMX registers
2035 in SImode and DImode */
2036 2, /* cost of moving SSE register */
2037 {8, 8, 8}, /* cost of loading SSE registers
2038 in SImode, DImode and TImode */
2039 {8, 8, 8}, /* cost of storing SSE registers
2040 in SImode, DImode and TImode */
2041 5, /* MMX or SSE register to integer */
2042 32, /* size of l1 cache. */
2043 512, /* size of l2 cache. */
2044 64, /* size of prefetch block */
2045 6, /* number of parallel prefetches */
2046 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2047 value is increased to perhaps more appropriate value of 5. */
2048 3, /* Branch cost */
2049 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2050 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2051 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2052 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2053 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2054 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2057 1, /* scalar_stmt_cost. */
2058 1, /* scalar load_cost. */
2059 1, /* scalar_store_cost. */
2060 1, /* vec_stmt_cost. */
2061 1, /* vec_to_scalar_cost. */
2062 1, /* scalar_to_vec_cost. */
2063 1, /* vec_align_load_cost. */
2064 2, /* vec_unalign_load_cost. */
2065 1, /* vec_store_cost. */
2066 3, /* cond_taken_branch_cost. */
2067 1, /* cond_not_taken_branch_cost. */
2070 /* core_cost should produce code tuned for Core familly of CPUs. */
2071 static stringop_algs core_memcpy[2] = {
2072 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2073 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2074 {-1, libcall, false}}}};
2075 static stringop_algs core_memset[2] = {
2076 {libcall, {{6, loop_1_byte, true},
2078 {8192, rep_prefix_4_byte, true},
2079 {-1, libcall, false}}},
2080 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2081 {-1, libcall, false}}}};
2084 struct processor_costs core_cost = {
2085 COSTS_N_INSNS (1), /* cost of an add instruction */
2086 /* On all chips taken into consideration lea is 2 cycles and more. With
2087 this cost however our current implementation of synth_mult results in
2088 use of unnecessary temporary registers causing regression on several
2089 SPECfp benchmarks. */
2090 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2091 COSTS_N_INSNS (1), /* variable shift costs */
2092 COSTS_N_INSNS (1), /* constant shift costs */
2093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2094 COSTS_N_INSNS (4), /* HI */
2095 COSTS_N_INSNS (3), /* SI */
2096 COSTS_N_INSNS (4), /* DI */
2097 COSTS_N_INSNS (2)}, /* other */
2098 0, /* cost of multiply per each bit set */
2099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2100 COSTS_N_INSNS (26), /* HI */
2101 COSTS_N_INSNS (42), /* SI */
2102 COSTS_N_INSNS (74), /* DI */
2103 COSTS_N_INSNS (74)}, /* other */
2104 COSTS_N_INSNS (1), /* cost of movsx */
2105 COSTS_N_INSNS (1), /* cost of movzx */
2106 8, /* "large" insn */
2107 17, /* MOVE_RATIO */
2108 4, /* cost for loading QImode using movzbl */
2109 {4, 4, 4}, /* cost of loading integer registers
2110 in QImode, HImode and SImode.
2111 Relative to reg-reg move (2). */
2112 {4, 4, 4}, /* cost of storing integer registers */
2113 4, /* cost of reg,reg fld/fst */
2114 {12, 12, 12}, /* cost of loading fp registers
2115 in SFmode, DFmode and XFmode */
2116 {6, 6, 8}, /* cost of storing fp registers
2117 in SFmode, DFmode and XFmode */
2118 2, /* cost of moving MMX register */
2119 {8, 8}, /* cost of loading MMX registers
2120 in SImode and DImode */
2121 {8, 8}, /* cost of storing MMX registers
2122 in SImode and DImode */
2123 2, /* cost of moving SSE register */
2124 {8, 8, 8}, /* cost of loading SSE registers
2125 in SImode, DImode and TImode */
2126 {8, 8, 8}, /* cost of storing SSE registers
2127 in SImode, DImode and TImode */
2128 5, /* MMX or SSE register to integer */
2129 64, /* size of l1 cache. */
2130 512, /* size of l2 cache. */
2131 64, /* size of prefetch block */
2132 6, /* number of parallel prefetches */
2133 /* FIXME perhaps more appropriate value is 5. */
2134 3, /* Branch cost */
2135 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2136 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2137 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2138 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2139 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2140 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2143 1, /* scalar_stmt_cost. */
2144 1, /* scalar load_cost. */
2145 1, /* scalar_store_cost. */
2146 1, /* vec_stmt_cost. */
2147 1, /* vec_to_scalar_cost. */
2148 1, /* scalar_to_vec_cost. */
2149 1, /* vec_align_load_cost. */
2150 2, /* vec_unalign_load_cost. */
2151 1, /* vec_store_cost. */
2152 3, /* cond_taken_branch_cost. */
2153 1, /* cond_not_taken_branch_cost. */
2157 /* Set by -mtune. */
2158 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2160 /* Set by -mtune or -Os. */
2161 const struct processor_costs *ix86_cost = &pentium_cost;
2163 /* Processor feature/optimization bitmasks. */
2164 #define m_386 (1<<PROCESSOR_I386)
2165 #define m_486 (1<<PROCESSOR_I486)
2166 #define m_PENT (1<<PROCESSOR_PENTIUM)
2167 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2168 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2169 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2170 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2171 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2172 #define m_CORE2 (1<<PROCESSOR_CORE2)
2173 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2174 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2175 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2176 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2177 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2178 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2179 #define m_KNL (1<<PROCESSOR_KNL)
2180 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2181 #define m_INTEL (1<<PROCESSOR_INTEL)
2183 #define m_GEODE (1<<PROCESSOR_GEODE)
2184 #define m_K6 (1<<PROCESSOR_K6)
2185 #define m_K6_GEODE (m_K6 | m_GEODE)
2186 #define m_K8 (1<<PROCESSOR_K8)
2187 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2188 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2189 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2190 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2191 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2192 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2193 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2194 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2195 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2196 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2197 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2198 #define m_BTVER (m_BTVER1 | m_BTVER2)
2199 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2202 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2204 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2206 #define DEF_TUNE(tune, name, selector) name,
2207 #include "x86-tune.def"
2211 /* Feature tests against the various tunings. */
2212 unsigned char ix86_tune_features[X86_TUNE_LAST];
2214 /* Feature tests against the various tunings used to create ix86_tune_features
2215 based on the processor mask. */
2216 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2218 #define DEF_TUNE(tune, name, selector) selector,
2219 #include "x86-tune.def"
2223 /* Feature tests against the various architecture variations. */
2224 unsigned char ix86_arch_features[X86_ARCH_LAST];
2226 /* Feature tests against the various architecture variations, used to create
2227 ix86_arch_features based on the processor mask. */
2228 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2229 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2230 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2232 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2235 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2238 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2241 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2245 /* In case the average insn count for single function invocation is
2246 lower than this constant, emit fast (but longer) prologue and
2248 #define FAST_PROLOGUE_INSN_COUNT 20
2250 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2251 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2252 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2253 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2255 /* Array of the smallest class containing reg number REGNO, indexed by
2256 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2258 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2260 /* ax, dx, cx, bx */
2261 AREG, DREG, CREG, BREG,
2262 /* si, di, bp, sp */
2263 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2265 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2266 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2269 /* flags, fpsr, fpcr, frame */
2270 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2272 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2275 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2278 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 /* SSE REX registers */
2281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2283 /* AVX-512 SSE registers */
2284 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 /* Mask registers. */
2289 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2290 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 /* MPX bound registers */
2292 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2295 /* The "default" register map used in 32bit mode. */
2297 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2299 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2300 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2301 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2302 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2303 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2304 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2308 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2309 101, 102, 103, 104, /* bound registers */
2312 /* The "default" register map used in 64bit mode. */
2314 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2316 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2317 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2318 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2319 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2320 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2321 8,9,10,11,12,13,14,15, /* extended integer registers */
2322 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2323 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2324 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2325 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2326 126, 127, 128, 129, /* bound registers */
2329 /* Define the register numbers to be used in Dwarf debugging information.
2330 The SVR4 reference port C compiler uses the following register numbers
2331 in its Dwarf output code:
2332 0 for %eax (gcc regno = 0)
2333 1 for %ecx (gcc regno = 2)
2334 2 for %edx (gcc regno = 1)
2335 3 for %ebx (gcc regno = 3)
2336 4 for %esp (gcc regno = 7)
2337 5 for %ebp (gcc regno = 6)
2338 6 for %esi (gcc regno = 4)
2339 7 for %edi (gcc regno = 5)
2340 The following three DWARF register numbers are never generated by
2341 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2342 believes these numbers have these meanings.
2343 8 for %eip (no gcc equivalent)
2344 9 for %eflags (gcc regno = 17)
2345 10 for %trapno (no gcc equivalent)
2346 It is not at all clear how we should number the FP stack registers
2347 for the x86 architecture. If the version of SDB on x86/svr4 were
2348 a bit less brain dead with respect to floating-point then we would
2349 have a precedent to follow with respect to DWARF register numbers
2350 for x86 FP registers, but the SDB on x86/svr4 is so completely
2351 broken with respect to FP registers that it is hardly worth thinking
2352 of it as something to strive for compatibility with.
2353 The version of x86/svr4 SDB I have at the moment does (partially)
2354 seem to believe that DWARF register number 11 is associated with
2355 the x86 register %st(0), but that's about all. Higher DWARF
2356 register numbers don't seem to be associated with anything in
2357 particular, and even for DWARF regno 11, SDB only seems to under-
2358 stand that it should say that a variable lives in %st(0) (when
2359 asked via an `=' command) if we said it was in DWARF regno 11,
2360 but SDB still prints garbage when asked for the value of the
2361 variable in question (via a `/' command).
2362 (Also note that the labels SDB prints for various FP stack regs
2363 when doing an `x' command are all wrong.)
2364 Note that these problems generally don't affect the native SVR4
2365 C compiler because it doesn't allow the use of -O with -g and
2366 because when it is *not* optimizing, it allocates a memory
2367 location for each floating-point variable, and the memory
2368 location is what gets described in the DWARF AT_location
2369 attribute for the variable in question.
2370 Regardless of the severe mental illness of the x86/svr4 SDB, we
2371 do something sensible here and we use the following DWARF
2372 register numbers. Note that these are all stack-top-relative
2374 11 for %st(0) (gcc regno = 8)
2375 12 for %st(1) (gcc regno = 9)
2376 13 for %st(2) (gcc regno = 10)
2377 14 for %st(3) (gcc regno = 11)
2378 15 for %st(4) (gcc regno = 12)
2379 16 for %st(5) (gcc regno = 13)
2380 17 for %st(6) (gcc regno = 14)
2381 18 for %st(7) (gcc regno = 15)
2383 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2385 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2386 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2387 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2388 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2389 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2394 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2395 101, 102, 103, 104, /* bound registers */
2398 /* Define parameter passing and return registers. */
2400 static int const x86_64_int_parameter_registers[6] =
2402 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2405 static int const x86_64_ms_abi_int_parameter_registers[4] =
2407 CX_REG, DX_REG, R8_REG, R9_REG
2410 static int const x86_64_int_return_registers[4] =
2412 AX_REG, DX_REG, DI_REG, SI_REG
2415 /* Additional registers that are clobbered by SYSV calls. */
2417 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2421 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2422 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2425 /* Define the structure for the machine field in struct function. */
2427 struct GTY(()) stack_local_entry {
2428 unsigned short mode;
2431 struct stack_local_entry *next;
2434 /* Structure describing stack frame layout.
2435 Stack grows downward:
2441 saved static chain if ix86_static_chain_on_stack
2443 saved frame pointer if frame_pointer_needed
2444 <- HARD_FRAME_POINTER
2450 <- sse_regs_save_offset
2453 [va_arg registers] |
2457 [padding2] | = to_allocate
2466 int outgoing_arguments_size;
2468 /* The offsets relative to ARG_POINTER. */
2469 HOST_WIDE_INT frame_pointer_offset;
2470 HOST_WIDE_INT hard_frame_pointer_offset;
2471 HOST_WIDE_INT stack_pointer_offset;
2472 HOST_WIDE_INT hfp_save_offset;
2473 HOST_WIDE_INT reg_save_offset;
2474 HOST_WIDE_INT sse_reg_save_offset;
2476 /* When save_regs_using_mov is set, emit prologue using
2477 move instead of push instructions. */
2478 bool save_regs_using_mov;
2481 /* Which cpu are we scheduling for. */
2482 enum attr_cpu ix86_schedule;
2484 /* Which cpu are we optimizing for. */
2485 enum processor_type ix86_tune;
2487 /* Which instruction set architecture to use. */
2488 enum processor_type ix86_arch;
2490 /* True if processor has SSE prefetch instruction. */
2491 unsigned char x86_prefetch_sse;
2493 /* -mstackrealign option */
2494 static const char ix86_force_align_arg_pointer_string[]
2495 = "force_align_arg_pointer";
2497 static rtx (*ix86_gen_leave) (void);
2498 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2499 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2501 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2502 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2503 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_clzero) (rtx);
2505 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2506 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2507 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2508 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2512 /* Preferred alignment for stack boundary in bits. */
2513 unsigned int ix86_preferred_stack_boundary;
2515 /* Alignment for incoming stack boundary in bits specified at
2517 static unsigned int ix86_user_incoming_stack_boundary;
2519 /* Default alignment for incoming stack boundary in bits. */
2520 static unsigned int ix86_default_incoming_stack_boundary;
2522 /* Alignment for incoming stack boundary in bits. */
2523 unsigned int ix86_incoming_stack_boundary;
2525 /* Calling abi specific va_list type nodes. */
2526 static GTY(()) tree sysv_va_list_type_node;
2527 static GTY(()) tree ms_va_list_type_node;
2529 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2530 char internal_label_prefix[16];
2531 int internal_label_prefix_len;
2533 /* Fence to use after loop using movnt. */
2536 /* Register class used for passing given 64bit part of the argument.
2537 These represent classes as documented by the PS ABI, with the exception
2538 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2539 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2541 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2542 whenever possible (upper half does contain padding). */
2543 enum x86_64_reg_class
2546 X86_64_INTEGER_CLASS,
2547 X86_64_INTEGERSI_CLASS,
2554 X86_64_COMPLEX_X87_CLASS,
2558 #define MAX_CLASSES 8
2560 /* Table of constants used by fldpi, fldln2, etc.... */
2561 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2562 static bool ext_80387_constants_init = 0;
2565 static struct machine_function * ix86_init_machine_status (void);
2566 static rtx ix86_function_value (const_tree, const_tree, bool);
2567 static bool ix86_function_value_regno_p (const unsigned int);
2568 static unsigned int ix86_function_arg_boundary (machine_mode,
2570 static rtx ix86_static_chain (const_tree, bool);
2571 static int ix86_function_regparm (const_tree, const_tree);
2572 static void ix86_compute_frame_layout (struct ix86_frame *);
2573 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2575 static void ix86_add_new_builtins (HOST_WIDE_INT);
2576 static tree ix86_canonical_va_list_type (tree);
2577 static void predict_jump (int);
2578 static unsigned int split_stack_prologue_scratch_regno (void);
2579 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2581 enum ix86_function_specific_strings
2583 IX86_FUNCTION_SPECIFIC_ARCH,
2584 IX86_FUNCTION_SPECIFIC_TUNE,
2585 IX86_FUNCTION_SPECIFIC_MAX
2588 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2589 const char *, enum fpmath_unit, bool);
2590 static void ix86_function_specific_save (struct cl_target_option *,
2591 struct gcc_options *opts);
2592 static void ix86_function_specific_restore (struct gcc_options *opts,
2593 struct cl_target_option *);
2594 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2595 static void ix86_function_specific_print (FILE *, int,
2596 struct cl_target_option *);
2597 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2598 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2599 struct gcc_options *,
2600 struct gcc_options *,
2601 struct gcc_options *);
2602 static bool ix86_can_inline_p (tree, tree);
2603 static void ix86_set_current_function (tree);
2604 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2606 static enum calling_abi ix86_function_abi (const_tree);
2609 #ifndef SUBTARGET32_DEFAULT_CPU
2610 #define SUBTARGET32_DEFAULT_CPU "i386"
2613 /* Whether -mtune= or -march= were specified */
2614 static int ix86_tune_defaulted;
2615 static int ix86_arch_specified;
2617 /* Vectorization library interface and handlers. */
2618 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2620 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2621 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2623 /* Processor target table, indexed by processor number */
2626 const char *const name; /* processor name */
2627 const struct processor_costs *cost; /* Processor costs */
2628 const int align_loop; /* Default alignments. */
2629 const int align_loop_max_skip;
2630 const int align_jump;
2631 const int align_jump_max_skip;
2632 const int align_func;
2635 /* This table must be in sync with enum processor_type in i386.h. */
2636 static const struct ptt processor_target_table[PROCESSOR_max] =
2638 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2639 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2640 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2641 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2642 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2643 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2644 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2645 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2646 {"core2", &core_cost, 16, 10, 16, 10, 16},
2647 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2648 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2649 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2650 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2651 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2652 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2653 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2654 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2655 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2656 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2657 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2658 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2659 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2660 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2661 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2662 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2663 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2664 {"znver1", &znver1_cost, 16, 10, 16, 7, 11},
2665 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2666 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2670 rest_of_handle_insert_vzeroupper (void)
2674 /* vzeroupper instructions are inserted immediately after reload to
2675 account for possible spills from 256bit registers. The pass
2676 reuses mode switching infrastructure by re-running mode insertion
2677 pass, so disable entities that have already been processed. */
2678 for (i = 0; i < MAX_386_ENTITIES; i++)
2679 ix86_optimize_mode_switching[i] = 0;
2681 ix86_optimize_mode_switching[AVX_U128] = 1;
2683 /* Call optimize_mode_switching. */
2684 g->get_passes ()->execute_pass_mode_switching ();
2688 /* Return 1 if INSN uses or defines a hard register.
2689 Hard register uses in a memory address are ignored.
2690 Clobbers and flags definitions are ignored. */
2693 has_non_address_hard_reg (rtx_insn *insn)
2696 FOR_EACH_INSN_DEF (ref, insn)
2697 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2698 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2699 && DF_REF_REGNO (ref) != FLAGS_REG)
2702 FOR_EACH_INSN_USE (ref, insn)
2703 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2709 /* Check if comparison INSN may be transformed
2710 into vector comparison. Currently we transform
2711 zero checks only which look like:
2713 (set (reg:CCZ 17 flags)
2714 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2715 (subreg:SI (reg:DI x) 0))
2716 (const_int 0 [0]))) */
2719 convertible_comparison_p (rtx_insn *insn)
2724 rtx def_set = single_set (insn);
2726 gcc_assert (def_set);
2728 rtx src = SET_SRC (def_set);
2729 rtx dst = SET_DEST (def_set);
2731 gcc_assert (GET_CODE (src) == COMPARE);
2733 if (GET_CODE (dst) != REG
2734 || REGNO (dst) != FLAGS_REG
2735 || GET_MODE (dst) != CCZmode)
2738 rtx op1 = XEXP (src, 0);
2739 rtx op2 = XEXP (src, 1);
2741 if (op2 != CONST0_RTX (GET_MODE (op2)))
2744 if (GET_CODE (op1) != IOR)
2747 op2 = XEXP (op1, 1);
2748 op1 = XEXP (op1, 0);
2752 || GET_MODE (op1) != SImode
2753 || GET_MODE (op2) != SImode
2754 || ((SUBREG_BYTE (op1) != 0
2755 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2756 && (SUBREG_BYTE (op2) != 0
2757 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2760 op1 = SUBREG_REG (op1);
2761 op2 = SUBREG_REG (op2);
2765 || GET_MODE (op1) != DImode)
2771 /* Return 1 if INSN may be converted into vector
2775 scalar_to_vector_candidate_p (rtx_insn *insn)
2777 rtx def_set = single_set (insn);
2782 if (has_non_address_hard_reg (insn))
2785 rtx src = SET_SRC (def_set);
2786 rtx dst = SET_DEST (def_set);
2788 if (GET_CODE (src) == COMPARE)
2789 return convertible_comparison_p (insn);
2791 /* We are interested in DImode promotion only. */
2792 if (GET_MODE (src) != DImode
2793 || GET_MODE (dst) != DImode)
2796 if (!REG_P (dst) && !MEM_P (dst))
2799 switch (GET_CODE (src))
2818 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
2821 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2824 if (GET_MODE (XEXP (src, 0)) != DImode
2825 || GET_MODE (XEXP (src, 1)) != DImode)
2831 /* For a given bitmap of insn UIDs scans all instruction and
2832 remove insn from CANDIDATES in case it has both convertible
2833 and not convertible definitions.
2835 All insns in a bitmap are conversion candidates according to
2836 scalar_to_vector_candidate_p. Currently it implies all insns
2840 remove_non_convertible_regs (bitmap candidates)
2844 bitmap regs = BITMAP_ALLOC (NULL);
2846 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2848 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2849 rtx reg = SET_DEST (def_set);
2852 || bitmap_bit_p (regs, REGNO (reg))
2853 || HARD_REGISTER_P (reg))
2856 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2858 def = DF_REF_NEXT_REG (def))
2860 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2864 "r%d has non convertible definition in insn %d\n",
2865 REGNO (reg), DF_REF_INSN_UID (def));
2867 bitmap_set_bit (regs, REGNO (reg));
2873 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2875 for (df_ref def = DF_REG_DEF_CHAIN (id);
2877 def = DF_REF_NEXT_REG (def))
2878 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2881 fprintf (dump_file, "Removing insn %d from candidates list\n",
2882 DF_REF_INSN_UID (def));
2884 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2897 static unsigned max_id;
2899 /* ID of a chain. */
2900 unsigned int chain_id;
2901 /* A queue of instructions to be included into a chain. */
2903 /* Instructions included into a chain. */
2905 /* All registers defined by a chain. */
2907 /* Registers used in both vector and sclar modes. */
2910 void build (bitmap candidates, unsigned insn_uid);
2911 int compute_convert_gain ();
2915 void add_insn (bitmap candidates, unsigned insn_uid);
2916 void add_to_queue (unsigned insn_uid);
2917 void mark_dual_mode_def (df_ref def);
2918 void analyze_register_chain (bitmap candidates, df_ref ref);
2919 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2920 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2921 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2922 void convert_insn (rtx_insn *insn);
2923 void convert_op (rtx *op, rtx_insn *insn);
2924 void convert_reg (unsigned regno);
2925 void make_vector_copies (unsigned regno);
2928 unsigned scalar_chain::max_id = 0;
2930 /* Initialize new chain. */
2932 scalar_chain::scalar_chain ()
2934 chain_id = ++max_id;
2937 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2939 bitmap_obstack_initialize (NULL);
2940 insns = BITMAP_ALLOC (NULL);
2941 defs = BITMAP_ALLOC (NULL);
2942 defs_conv = BITMAP_ALLOC (NULL);
2946 /* Free chain's data. */
2948 scalar_chain::~scalar_chain ()
2950 BITMAP_FREE (insns);
2952 BITMAP_FREE (defs_conv);
2953 bitmap_obstack_release (NULL);
2956 /* Add instruction into chains' queue. */
2959 scalar_chain::add_to_queue (unsigned insn_uid)
2961 if (bitmap_bit_p (insns, insn_uid)
2962 || bitmap_bit_p (queue, insn_uid))
2966 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2967 insn_uid, chain_id);
2968 bitmap_set_bit (queue, insn_uid);
2971 /* Mark register defined by DEF as requiring conversion. */
2974 scalar_chain::mark_dual_mode_def (df_ref def)
2976 gcc_assert (DF_REF_REG_DEF_P (def));
2978 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2983 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2984 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2986 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2989 /* Check REF's chain to add new insns into a queue
2990 and find registers requiring conversion. */
2993 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
2997 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
2998 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
2999 add_to_queue (DF_REF_INSN_UID (ref));
3001 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3003 unsigned uid = DF_REF_INSN_UID (chain->ref);
3005 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3008 if (!DF_REF_REG_MEM_P (chain->ref))
3010 if (bitmap_bit_p (insns, uid))
3013 if (bitmap_bit_p (candidates, uid))
3020 if (DF_REF_REG_DEF_P (chain->ref))
3023 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3024 DF_REF_REGNO (chain->ref), uid);
3025 mark_dual_mode_def (chain->ref);
3030 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3031 DF_REF_REGNO (chain->ref), uid);
3032 mark_dual_mode_def (ref);
3037 /* Add instruction into a chain. */
3040 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3042 if (bitmap_bit_p (insns, insn_uid))
3046 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3048 bitmap_set_bit (insns, insn_uid);
3050 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3051 rtx def_set = single_set (insn);
3052 if (def_set && REG_P (SET_DEST (def_set))
3053 && !HARD_REGISTER_P (SET_DEST (def_set)))
3054 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3058 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3059 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3060 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3062 def = DF_REF_NEXT_REG (def))
3063 analyze_register_chain (candidates, def);
3064 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3065 if (!DF_REF_REG_MEM_P (ref))
3066 analyze_register_chain (candidates, ref);
3069 /* Build new chain starting from insn INSN_UID recursively
3070 adding all dependent uses and definitions. */
3073 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3075 queue = BITMAP_ALLOC (NULL);
3076 bitmap_set_bit (queue, insn_uid);
3079 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3081 while (!bitmap_empty_p (queue))
3083 insn_uid = bitmap_first_set_bit (queue);
3084 bitmap_clear_bit (queue, insn_uid);
3085 bitmap_clear_bit (candidates, insn_uid);
3086 add_insn (candidates, insn_uid);
3091 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3092 fprintf (dump_file, " insns: ");
3093 dump_bitmap (dump_file, insns);
3094 if (!bitmap_empty_p (defs_conv))
3098 const char *comma = "";
3099 fprintf (dump_file, " defs to convert: ");
3100 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3102 fprintf (dump_file, "%sr%d", comma, id);
3105 fprintf (dump_file, "\n");
3109 BITMAP_FREE (queue);
3112 /* Compute a gain for chain conversion. */
3115 scalar_chain::compute_convert_gain ()
3123 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3125 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3127 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3128 rtx def_set = single_set (insn);
3129 rtx src = SET_SRC (def_set);
3130 rtx dst = SET_DEST (def_set);
3132 if (REG_P (src) && REG_P (dst))
3133 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3134 else if (REG_P (src) && MEM_P (dst))
3135 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3136 else if (MEM_P (src) && REG_P (dst))
3137 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3138 else if (GET_CODE (src) == PLUS
3139 || GET_CODE (src) == MINUS
3140 || GET_CODE (src) == IOR
3141 || GET_CODE (src) == XOR
3142 || GET_CODE (src) == AND)
3143 gain += ix86_cost->add;
3144 else if (GET_CODE (src) == COMPARE)
3146 /* Assume comparison cost is the same. */
3153 fprintf (dump_file, " Instruction convertion gain: %d\n", gain);
3155 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3156 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3159 fprintf (dump_file, " Registers convertion cost: %d\n", cost);
3164 fprintf (dump_file, " Total gain: %d\n", gain);
3169 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3172 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3175 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3177 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3179 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3182 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3183 else if (fmt[i] == 'E')
3184 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3185 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3192 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3195 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3197 replace_with_subreg (single_set (insn), reg, new_reg);
3200 /* Insert generated conversion instruction sequence INSNS
3201 after instruction AFTER. New BB may be required in case
3202 instruction has EH region attached. */
3205 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3207 if (!control_flow_insn_p (after))
3209 emit_insn_after (insns, after);
3213 basic_block bb = BLOCK_FOR_INSN (after);
3214 edge e = find_fallthru_edge (bb->succs);
3217 basic_block new_bb = split_edge (e);
3218 emit_insn_after (insns, BB_HEAD (new_bb));
3221 /* Make vector copies for all register REGNO definitions
3222 and replace its uses in a chain. */
3225 scalar_chain::make_vector_copies (unsigned regno)
3227 rtx reg = regno_reg_rtx[regno];
3228 rtx vreg = gen_reg_rtx (DImode);
3231 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3232 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3234 rtx_insn *insn = DF_REF_INSN (ref);
3239 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3240 CONST0_RTX (V4SImode),
3241 gen_rtx_SUBREG (SImode, reg, 0)));
3242 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3243 gen_rtx_SUBREG (V4SImode, vreg, 0),
3244 gen_rtx_SUBREG (SImode, reg, 4),
3247 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3249 rtx tmp = gen_reg_rtx (DImode);
3250 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3251 CONST0_RTX (V4SImode),
3252 gen_rtx_SUBREG (SImode, reg, 0)));
3253 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3254 CONST0_RTX (V4SImode),
3255 gen_rtx_SUBREG (SImode, reg, 4)));
3256 emit_insn (gen_vec_interleave_lowv4si
3257 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3258 gen_rtx_SUBREG (V4SImode, vreg, 0),
3259 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3263 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3264 emit_move_insn (adjust_address (tmp, SImode, 0),
3265 gen_rtx_SUBREG (SImode, reg, 0));
3266 emit_move_insn (adjust_address (tmp, SImode, 4),
3267 gen_rtx_SUBREG (SImode, reg, 4));
3268 emit_move_insn (vreg, tmp);
3270 emit_conversion_insns (get_insns (), insn);
3275 " Copied r%d to a vector register r%d for insn %d\n",
3276 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3279 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3280 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3282 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3285 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3286 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3290 /* Convert all definitions of register REGNO
3291 and fix its uses. Scalar copies may be created
3292 in case register is used in not convertible insn. */
3295 scalar_chain::convert_reg (unsigned regno)
3297 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3298 rtx reg = regno_reg_rtx[regno];
3299 rtx scopy = NULL_RTX;
3303 conv = BITMAP_ALLOC (NULL);
3304 bitmap_copy (conv, insns);
3307 scopy = gen_reg_rtx (DImode);
3309 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3311 rtx_insn *insn = DF_REF_INSN (ref);
3312 rtx def_set = single_set (insn);
3313 rtx src = SET_SRC (def_set);
3314 rtx reg = DF_REF_REG (ref);
3318 replace_with_subreg_in_insn (insn, reg, reg);
3319 bitmap_clear_bit (conv, INSN_UID (insn));
3324 rtx vcopy = gen_reg_rtx (V2DImode);
3327 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3329 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3330 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3331 gen_rtx_SUBREG (SImode, vcopy, 0));
3332 emit_move_insn (vcopy,
3333 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3334 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3335 gen_rtx_SUBREG (SImode, vcopy, 0));
3339 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3340 emit_move_insn (tmp, reg);
3341 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3342 adjust_address (tmp, SImode, 0));
3343 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3344 adjust_address (tmp, SImode, 4));
3346 emit_conversion_insns (get_insns (), insn);
3351 " Copied r%d to a scalar register r%d for insn %d\n",
3352 regno, REGNO (scopy), INSN_UID (insn));
3356 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3357 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3359 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3361 rtx def_set = single_set (DF_REF_INSN (ref));
3362 if (!MEM_P (SET_DEST (def_set))
3363 || !REG_P (SET_SRC (def_set)))
3364 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3365 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3368 else if (NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3370 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3371 df_insn_rescan (DF_REF_INSN (ref));
3377 /* Convert operand OP in INSN. All register uses
3378 are converted during registers conversion.
3379 Therefore we should just handle memory operands. */
3382 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3384 *op = copy_rtx_if_shared (*op);
3388 rtx tmp = gen_reg_rtx (DImode);
3390 emit_insn_before (gen_move_insn (tmp, *op), insn);
3391 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3394 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3395 INSN_UID (insn), REGNO (tmp));
3399 gcc_assert (SUBREG_P (*op));
3400 gcc_assert (GET_MODE (*op) == V2DImode);
3404 /* Convert INSN to vector mode. */
3407 scalar_chain::convert_insn (rtx_insn *insn)
3409 rtx def_set = single_set (insn);
3410 rtx src = SET_SRC (def_set);
3411 rtx dst = SET_DEST (def_set);
3414 if (MEM_P (dst) && !REG_P (src))
3416 /* There are no scalar integer instructions and therefore
3417 temporary register usage is required. */
3418 rtx tmp = gen_reg_rtx (DImode);
3419 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3420 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3423 switch (GET_CODE (src))
3430 convert_op (&XEXP (src, 0), insn);
3431 convert_op (&XEXP (src, 1), insn);
3432 PUT_MODE (src, V2DImode);
3437 convert_op (&src, insn);
3444 gcc_assert (GET_MODE (src) == V2DImode);
3448 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3450 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3451 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3454 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3456 subreg = copy_rtx_if_shared (src);
3457 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3458 copy_rtx_if_shared (subreg),
3459 copy_rtx_if_shared (subreg)),
3461 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3462 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3463 copy_rtx_if_shared (src)),
3471 SET_SRC (def_set) = src;
3472 SET_DEST (def_set) = dst;
3474 /* Drop possible dead definitions. */
3475 PATTERN (insn) = def_set;
3477 INSN_CODE (insn) = -1;
3478 recog_memoized (insn);
3479 df_insn_rescan (insn);
3482 /* Convert whole chain creating required register
3483 conversions and copies. */
3486 scalar_chain::convert ()
3490 int converted_insns = 0;
3492 if (!dbg_cnt (stv_conversion))
3496 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3498 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3501 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3502 make_vector_copies (id);
3504 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3506 convert_insn (DF_INSN_UID_GET (id)->insn);
3510 return converted_insns;
3513 /* Main STV pass function. Find and convert scalar
3514 instructions into vector mode when profitable. */
3517 convert_scalars_to_vector ()
3521 int converted_insns = 0;
3523 bitmap_obstack_initialize (NULL);
3524 candidates = BITMAP_ALLOC (NULL);
3526 calculate_dominance_info (CDI_DOMINATORS);
3527 df_set_flags (DF_DEFER_INSN_RESCAN);
3528 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3529 df_md_add_problem ();
3532 /* Find all instructions we want to convert into vector mode. */
3534 fprintf (dump_file, "Searching for mode convertion candidates...\n");
3536 FOR_EACH_BB_FN (bb, cfun)
3539 FOR_BB_INSNS (bb, insn)
3540 if (scalar_to_vector_candidate_p (insn))
3543 fprintf (dump_file, " insn %d is marked as a candidate\n",
3546 bitmap_set_bit (candidates, INSN_UID (insn));
3550 remove_non_convertible_regs (candidates);
3552 if (bitmap_empty_p (candidates))
3554 fprintf (dump_file, "There are no candidates for optimization.\n");
3556 while (!bitmap_empty_p (candidates))
3558 unsigned uid = bitmap_first_set_bit (candidates);
3561 /* Find instructions chain we want to convert to vector mode.
3562 Check all uses and definitions to estimate all required
3564 chain.build (candidates, uid);
3566 if (chain.compute_convert_gain () > 0)
3567 converted_insns += chain.convert ();
3570 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3575 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3577 BITMAP_FREE (candidates);
3578 bitmap_obstack_release (NULL);
3579 df_process_deferred_rescans ();
3581 /* Conversion means we may have 128bit register spills/fills
3582 which require aligned stack. */
3583 if (converted_insns)
3585 if (crtl->stack_alignment_needed < 128)
3586 crtl->stack_alignment_needed = 128;
3587 if (crtl->stack_alignment_estimated < 128)
3588 crtl->stack_alignment_estimated = 128;
3596 const pass_data pass_data_insert_vzeroupper =
3598 RTL_PASS, /* type */
3599 "vzeroupper", /* name */
3600 OPTGROUP_NONE, /* optinfo_flags */
3601 TV_NONE, /* tv_id */
3602 0, /* properties_required */
3603 0, /* properties_provided */
3604 0, /* properties_destroyed */
3605 0, /* todo_flags_start */
3606 TODO_df_finish, /* todo_flags_finish */
3609 class pass_insert_vzeroupper : public rtl_opt_pass
3612 pass_insert_vzeroupper(gcc::context *ctxt)
3613 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3616 /* opt_pass methods: */
3617 virtual bool gate (function *)
3619 return TARGET_AVX && !TARGET_AVX512F
3620 && TARGET_VZEROUPPER && flag_expensive_optimizations
3624 virtual unsigned int execute (function *)
3626 return rest_of_handle_insert_vzeroupper ();
3629 }; // class pass_insert_vzeroupper
3631 const pass_data pass_data_stv =
3633 RTL_PASS, /* type */
3635 OPTGROUP_NONE, /* optinfo_flags */
3636 TV_NONE, /* tv_id */
3637 0, /* properties_required */
3638 0, /* properties_provided */
3639 0, /* properties_destroyed */
3640 0, /* todo_flags_start */
3641 TODO_df_finish, /* todo_flags_finish */
3644 class pass_stv : public rtl_opt_pass
3647 pass_stv (gcc::context *ctxt)
3648 : rtl_opt_pass (pass_data_stv, ctxt)
3651 /* opt_pass methods: */
3652 virtual bool gate (function *)
3654 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3657 virtual unsigned int execute (function *)
3659 return convert_scalars_to_vector ();
3662 }; // class pass_stv
3667 make_pass_insert_vzeroupper (gcc::context *ctxt)
3669 return new pass_insert_vzeroupper (ctxt);
3673 make_pass_stv (gcc::context *ctxt)
3675 return new pass_stv (ctxt);
3678 /* Return true if a red-zone is in use. */
3681 ix86_using_red_zone (void)
3683 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3686 /* Return a string that documents the current -m options. The caller is
3687 responsible for freeing the string. */
3690 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3691 const char *tune, enum fpmath_unit fpmath,
3694 struct ix86_target_opts
3696 const char *option; /* option string */
3697 HOST_WIDE_INT mask; /* isa mask options */
3700 /* This table is ordered so that options like -msse4.2 that imply
3701 preceding options while match those first. */
3702 static struct ix86_target_opts isa_opts[] =
3704 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3705 { "-mfma", OPTION_MASK_ISA_FMA },
3706 { "-mxop", OPTION_MASK_ISA_XOP },
3707 { "-mlwp", OPTION_MASK_ISA_LWP },
3708 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3709 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3710 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3711 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3712 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3713 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3714 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3715 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3716 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3717 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3718 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3719 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3720 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3721 { "-msse3", OPTION_MASK_ISA_SSE3 },
3722 { "-msse2", OPTION_MASK_ISA_SSE2 },
3723 { "-msse", OPTION_MASK_ISA_SSE },
3724 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3725 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3726 { "-mmmx", OPTION_MASK_ISA_MMX },
3727 { "-mabm", OPTION_MASK_ISA_ABM },
3728 { "-mbmi", OPTION_MASK_ISA_BMI },
3729 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3730 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3731 { "-mhle", OPTION_MASK_ISA_HLE },
3732 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3733 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3734 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3735 { "-madx", OPTION_MASK_ISA_ADX },
3736 { "-mtbm", OPTION_MASK_ISA_TBM },
3737 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3738 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3739 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3740 { "-maes", OPTION_MASK_ISA_AES },
3741 { "-msha", OPTION_MASK_ISA_SHA },
3742 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3743 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3744 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3745 { "-mf16c", OPTION_MASK_ISA_F16C },
3746 { "-mrtm", OPTION_MASK_ISA_RTM },
3747 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3748 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3749 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3750 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3751 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3752 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3753 { "-mmpx", OPTION_MASK_ISA_MPX },
3754 { "-mclwb", OPTION_MASK_ISA_CLWB },
3755 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3756 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3757 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3758 { "-mpku", OPTION_MASK_ISA_PKU },
3762 static struct ix86_target_opts flag_opts[] =
3764 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3765 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3766 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3767 { "-m80387", MASK_80387 },
3768 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3769 { "-malign-double", MASK_ALIGN_DOUBLE },
3770 { "-mcld", MASK_CLD },
3771 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3772 { "-mieee-fp", MASK_IEEE_FP },
3773 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3774 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3775 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3776 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3777 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3778 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3779 { "-mno-red-zone", MASK_NO_RED_ZONE },
3780 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3781 { "-mrecip", MASK_RECIP },
3782 { "-mrtd", MASK_RTD },
3783 { "-msseregparm", MASK_SSEREGPARM },
3784 { "-mstack-arg-probe", MASK_STACK_PROBE },
3785 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3786 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3787 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3788 { "-mvzeroupper", MASK_VZEROUPPER },
3789 { "-mstv", MASK_STV},
3790 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3791 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3792 { "-mprefer-avx128", MASK_PREFER_AVX128},
3795 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3798 char target_other[40];
3808 memset (opts, '\0', sizeof (opts));
3810 /* Add -march= option. */
3813 opts[num][0] = "-march=";
3814 opts[num++][1] = arch;
3817 /* Add -mtune= option. */
3820 opts[num][0] = "-mtune=";
3821 opts[num++][1] = tune;
3824 /* Add -m32/-m64/-mx32. */
3825 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3827 if ((isa & OPTION_MASK_ABI_64) != 0)
3831 isa &= ~ (OPTION_MASK_ISA_64BIT
3832 | OPTION_MASK_ABI_64
3833 | OPTION_MASK_ABI_X32);
3837 opts[num++][0] = abi;
3839 /* Pick out the options in isa options. */
3840 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3842 if ((isa & isa_opts[i].mask) != 0)
3844 opts[num++][0] = isa_opts[i].option;
3845 isa &= ~ isa_opts[i].mask;
3849 if (isa && add_nl_p)
3851 opts[num++][0] = isa_other;
3852 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3856 /* Add flag options. */
3857 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3859 if ((flags & flag_opts[i].mask) != 0)
3861 opts[num++][0] = flag_opts[i].option;
3862 flags &= ~ flag_opts[i].mask;
3866 if (flags && add_nl_p)
3868 opts[num++][0] = target_other;
3869 sprintf (target_other, "(other flags: %#x)", flags);
3872 /* Add -fpmath= option. */
3875 opts[num][0] = "-mfpmath=";
3876 switch ((int) fpmath)
3879 opts[num++][1] = "387";
3883 opts[num++][1] = "sse";
3886 case FPMATH_387 | FPMATH_SSE:
3887 opts[num++][1] = "sse+387";
3899 gcc_assert (num < ARRAY_SIZE (opts));
3901 /* Size the string. */
3903 sep_len = (add_nl_p) ? 3 : 1;
3904 for (i = 0; i < num; i++)
3907 for (j = 0; j < 2; j++)
3909 len += strlen (opts[i][j]);
3912 /* Build the string. */
3913 ret = ptr = (char *) xmalloc (len);
3916 for (i = 0; i < num; i++)
3920 for (j = 0; j < 2; j++)
3921 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3928 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3936 for (j = 0; j < 2; j++)
3939 memcpy (ptr, opts[i][j], len2[j]);
3941 line_len += len2[j];
3946 gcc_assert (ret + len >= ptr);
3951 /* Return true, if profiling code should be emitted before
3952 prologue. Otherwise it returns false.
3953 Note: For x86 with "hotfix" it is sorried. */
3955 ix86_profile_before_prologue (void)
3957 return flag_fentry != 0;
3960 /* Function that is callable from the debugger to print the current
3962 void ATTRIBUTE_UNUSED
3963 ix86_debug_options (void)
3965 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3966 ix86_arch_string, ix86_tune_string,
3971 fprintf (stderr, "%s\n\n", opts);
3975 fputs ("<no options>\n\n", stderr);
3980 /* Return true if T is one of the bytes we should avoid with
3984 ix86_rop_should_change_byte_p (int t)
3986 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3989 static const char *stringop_alg_names[] = {
3991 #define DEF_ALG(alg, name) #name,
3992 #include "stringop.def"
3997 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3998 The string is of the following form (or comma separated list of it):
4000 strategy_alg:max_size:[align|noalign]
4002 where the full size range for the strategy is either [0, max_size] or
4003 [min_size, max_size], in which min_size is the max_size + 1 of the
4004 preceding range. The last size range must have max_size == -1.
4009 -mmemcpy-strategy=libcall:-1:noalign
4011 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4015 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4017 This is to tell the compiler to use the following strategy for memset
4018 1) when the expected size is between [1, 16], use rep_8byte strategy;
4019 2) when the size is between [17, 2048], use vector_loop;
4020 3) when the size is > 2048, use libcall. */
4022 struct stringop_size_range
4030 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4032 const struct stringop_algs *default_algs;
4033 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4034 char *curr_range_str, *next_range_str;
4038 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4040 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4042 curr_range_str = strategy_str;
4049 next_range_str = strchr (curr_range_str, ',');
4051 *next_range_str++ = '\0';
4053 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4054 alg_name, &maxs, align))
4056 error ("wrong arg %s to option %s", curr_range_str,
4057 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4061 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4063 error ("size ranges of option %s should be increasing",
4064 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4068 for (i = 0; i < last_alg; i++)
4069 if (!strcmp (alg_name, stringop_alg_names[i]))
4074 error ("wrong stringop strategy name %s specified for option %s",
4076 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4080 if ((stringop_alg) i == rep_prefix_8_byte
4083 /* rep; movq isn't available in 32-bit code. */
4084 error ("stringop strategy name %s specified for option %s "
4085 "not supported for 32-bit code",
4087 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4091 input_ranges[n].max = maxs;
4092 input_ranges[n].alg = (stringop_alg) i;
4093 if (!strcmp (align, "align"))
4094 input_ranges[n].noalign = false;
4095 else if (!strcmp (align, "noalign"))
4096 input_ranges[n].noalign = true;
4099 error ("unknown alignment %s specified for option %s",
4100 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4104 curr_range_str = next_range_str;
4106 while (curr_range_str);
4108 if (input_ranges[n - 1].max != -1)
4110 error ("the max value for the last size range should be -1"
4112 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4116 if (n > MAX_STRINGOP_ALGS)
4118 error ("too many size ranges specified in option %s",
4119 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4123 /* Now override the default algs array. */
4124 for (i = 0; i < n; i++)
4126 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4127 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4128 = input_ranges[i].alg;
4129 *const_cast<int *>(&default_algs->size[i].noalign)
4130 = input_ranges[i].noalign;
4135 /* parse -mtune-ctrl= option. When DUMP is true,
4136 print the features that are explicitly set. */
4139 parse_mtune_ctrl_str (bool dump)
4141 if (!ix86_tune_ctrl_string)
4144 char *next_feature_string = NULL;
4145 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4146 char *orig = curr_feature_string;
4152 next_feature_string = strchr (curr_feature_string, ',');
4153 if (next_feature_string)
4154 *next_feature_string++ = '\0';
4155 if (*curr_feature_string == '^')
4157 curr_feature_string++;
4160 for (i = 0; i < X86_TUNE_LAST; i++)
4162 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4164 ix86_tune_features[i] = !clear;
4166 fprintf (stderr, "Explicitly %s feature %s\n",
4167 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4171 if (i == X86_TUNE_LAST)
4172 error ("Unknown parameter to option -mtune-ctrl: %s",
4173 clear ? curr_feature_string - 1 : curr_feature_string);
4174 curr_feature_string = next_feature_string;
4176 while (curr_feature_string);
4180 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4184 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4186 unsigned int ix86_tune_mask = 1u << ix86_tune;
4189 for (i = 0; i < X86_TUNE_LAST; ++i)
4191 if (ix86_tune_no_default)
4192 ix86_tune_features[i] = 0;
4194 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4199 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4200 for (i = 0; i < X86_TUNE_LAST; i++)
4201 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4202 ix86_tune_features[i] ? "on" : "off");
4205 parse_mtune_ctrl_str (dump);
4209 /* Default align_* from the processor table. */
4212 ix86_default_align (struct gcc_options *opts)
4214 if (opts->x_align_loops == 0)
4216 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4217 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4219 if (opts->x_align_jumps == 0)
4221 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4222 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4224 if (opts->x_align_functions == 0)
4226 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4230 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4233 ix86_override_options_after_change (void)
4235 ix86_default_align (&global_options);
4238 /* Override various settings based on options. If MAIN_ARGS_P, the
4239 options are from the command line, otherwise they are from
4243 ix86_option_override_internal (bool main_args_p,
4244 struct gcc_options *opts,
4245 struct gcc_options *opts_set)
4248 unsigned int ix86_arch_mask;
4249 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4254 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4255 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4256 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4257 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4258 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4259 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4260 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4261 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4262 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4263 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4264 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4265 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4266 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4267 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4268 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4269 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4270 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4271 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4272 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4273 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4274 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4275 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4276 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4277 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4278 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4279 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4280 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4281 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4282 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4283 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4284 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4285 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4286 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4287 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4288 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4289 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4290 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4291 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4292 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4293 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4294 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4295 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4296 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4297 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4298 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4299 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4300 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4301 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4302 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4303 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4304 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4305 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4306 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4307 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4308 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4309 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4310 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4311 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4312 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4313 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4314 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4317 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4318 | PTA_CX16 | PTA_FXSR)
4319 #define PTA_NEHALEM \
4320 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4321 #define PTA_WESTMERE \
4322 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4323 #define PTA_SANDYBRIDGE \
4324 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4325 #define PTA_IVYBRIDGE \
4326 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4327 #define PTA_HASWELL \
4328 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4329 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4330 #define PTA_BROADWELL \
4331 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4332 #define PTA_SKYLAKE \
4333 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4334 #define PTA_SKYLAKE_AVX512 \
4335 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4336 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4338 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4339 #define PTA_BONNELL \
4340 (PTA_CORE2 | PTA_MOVBE)
4341 #define PTA_SILVERMONT \
4342 (PTA_WESTMERE | PTA_MOVBE)
4344 /* if this reaches 64, need to widen struct pta flags below */
4348 const char *const name; /* processor name or nickname. */
4349 const enum processor_type processor;
4350 const enum attr_cpu schedule;
4351 const unsigned HOST_WIDE_INT flags;
4353 const processor_alias_table[] =
4355 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4356 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4357 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4358 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4359 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4360 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4361 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4362 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4363 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4364 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4365 PTA_MMX | PTA_SSE | PTA_FXSR},
4366 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4367 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4368 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4369 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4370 PTA_MMX | PTA_SSE | PTA_FXSR},
4371 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4372 PTA_MMX | PTA_SSE | PTA_FXSR},
4373 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4374 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4375 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4376 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4377 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4378 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4379 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4380 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4381 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4382 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4383 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4384 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4385 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4386 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4387 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4388 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4390 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4392 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4394 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4396 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4397 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4398 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4399 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4400 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4401 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4402 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4403 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4404 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4405 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4406 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4407 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4408 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4409 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4410 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4411 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4412 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4413 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4414 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4416 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4417 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4418 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4419 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4420 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4421 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4422 {"x86-64", PROCESSOR_K8, CPU_K8,
4423 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4424 {"k8", PROCESSOR_K8, CPU_K8,
4425 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4426 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4427 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4428 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4429 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4430 {"opteron", PROCESSOR_K8, CPU_K8,
4431 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4432 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4433 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4434 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4435 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4436 {"athlon64", PROCESSOR_K8, CPU_K8,
4437 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4438 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4439 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4441 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4442 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4444 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4445 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4446 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4447 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4448 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4449 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4450 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4451 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4452 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4453 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4454 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4455 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4456 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4457 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4458 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4459 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4460 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4461 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4462 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4463 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4464 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4465 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4466 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4467 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4468 | PTA_XSAVEOPT | PTA_FSGSBASE},
4469 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4470 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4471 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4472 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4473 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4474 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4475 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4476 | PTA_MOVBE | PTA_MWAITX},
4477 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4478 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4479 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4480 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4481 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4482 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4483 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4484 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4485 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4486 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4487 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4488 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4489 | PTA_FXSR | PTA_XSAVE},
4490 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4491 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4492 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4493 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4494 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4495 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4497 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4499 | PTA_HLE /* flags are only used for -march switch. */ },
4502 /* -mrecip options. */
4505 const char *string; /* option name */
4506 unsigned int mask; /* mask bits to set */
4508 const recip_options[] =
4510 { "all", RECIP_MASK_ALL },
4511 { "none", RECIP_MASK_NONE },
4512 { "div", RECIP_MASK_DIV },
4513 { "sqrt", RECIP_MASK_SQRT },
4514 { "vec-div", RECIP_MASK_VEC_DIV },
4515 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4518 int const pta_size = ARRAY_SIZE (processor_alias_table);
4520 /* Set up prefix/suffix so the error messages refer to either the command
4521 line argument, or the attribute(target). */
4530 prefix = "option(\"";
4535 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4536 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4537 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4538 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4539 #ifdef TARGET_BI_ARCH
4542 #if TARGET_BI_ARCH == 1
4543 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4544 is on and OPTION_MASK_ABI_X32 is off. We turn off
4545 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4547 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4548 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4550 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4551 on and OPTION_MASK_ABI_64 is off. We turn off
4552 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4553 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4554 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4555 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4556 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4558 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4559 && TARGET_IAMCU_P (opts->x_target_flags))
4560 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4561 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4565 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4567 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4568 OPTION_MASK_ABI_64 for TARGET_X32. */
4569 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4570 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4572 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4573 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4574 | OPTION_MASK_ABI_X32
4575 | OPTION_MASK_ABI_64);
4576 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4578 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4579 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4581 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4584 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4585 SUBTARGET_OVERRIDE_OPTIONS;
4588 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4589 SUBSUBTARGET_OVERRIDE_OPTIONS;
4592 /* -fPIC is the default for x86_64. */
4593 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4594 opts->x_flag_pic = 2;
4596 /* Need to check -mtune=generic first. */
4597 if (opts->x_ix86_tune_string)
4599 /* As special support for cross compilers we read -mtune=native
4600 as -mtune=generic. With native compilers we won't see the
4601 -mtune=native, as it was changed by the driver. */
4602 if (!strcmp (opts->x_ix86_tune_string, "native"))
4604 opts->x_ix86_tune_string = "generic";
4606 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4607 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4608 "%stune=k8%s or %stune=generic%s instead as appropriate",
4609 prefix, suffix, prefix, suffix, prefix, suffix);
4613 if (opts->x_ix86_arch_string)
4614 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4615 if (!opts->x_ix86_tune_string)
4617 opts->x_ix86_tune_string
4618 = processor_target_table[TARGET_CPU_DEFAULT].name;
4619 ix86_tune_defaulted = 1;
4622 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4623 or defaulted. We need to use a sensible tune option. */
4624 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4626 opts->x_ix86_tune_string = "generic";
4630 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4631 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4633 /* rep; movq isn't available in 32-bit code. */
4634 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4635 opts->x_ix86_stringop_alg = no_stringop;
4638 if (!opts->x_ix86_arch_string)
4639 opts->x_ix86_arch_string
4640 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4641 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4643 ix86_arch_specified = 1;
4645 if (opts_set->x_ix86_pmode)
4647 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4648 && opts->x_ix86_pmode == PMODE_SI)
4649 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4650 && opts->x_ix86_pmode == PMODE_DI))
4651 error ("address mode %qs not supported in the %s bit mode",
4652 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4653 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4656 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4657 ? PMODE_DI : PMODE_SI;
4659 if (!opts_set->x_ix86_abi)
4660 opts->x_ix86_abi = DEFAULT_ABI;
4662 /* For targets using ms ABI enable ms-extensions, if not
4663 explicit turned off. For non-ms ABI we turn off this
4665 if (!opts_set->x_flag_ms_extensions)
4666 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4668 if (opts_set->x_ix86_cmodel)
4670 switch (opts->x_ix86_cmodel)
4674 if (opts->x_flag_pic)
4675 opts->x_ix86_cmodel = CM_SMALL_PIC;
4676 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4677 error ("code model %qs not supported in the %s bit mode",
4683 if (opts->x_flag_pic)
4684 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4685 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4686 error ("code model %qs not supported in the %s bit mode",
4688 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4689 error ("code model %qs not supported in x32 mode",
4695 if (opts->x_flag_pic)
4696 opts->x_ix86_cmodel = CM_LARGE_PIC;
4697 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4698 error ("code model %qs not supported in the %s bit mode",
4700 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4701 error ("code model %qs not supported in x32 mode",
4706 if (opts->x_flag_pic)
4707 error ("code model %s does not support PIC mode", "32");
4708 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4709 error ("code model %qs not supported in the %s bit mode",
4714 if (opts->x_flag_pic)
4716 error ("code model %s does not support PIC mode", "kernel");
4717 opts->x_ix86_cmodel = CM_32;
4719 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4720 error ("code model %qs not supported in the %s bit mode",
4730 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4731 use of rip-relative addressing. This eliminates fixups that
4732 would otherwise be needed if this object is to be placed in a
4733 DLL, and is essentially just as efficient as direct addressing. */
4734 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4735 && (TARGET_RDOS || TARGET_PECOFF))
4736 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4737 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4738 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4740 opts->x_ix86_cmodel = CM_32;
4742 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4744 error ("-masm=intel not supported in this configuration");
4745 opts->x_ix86_asm_dialect = ASM_ATT;
4747 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4748 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4749 sorry ("%i-bit mode not compiled in",
4750 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4752 for (i = 0; i < pta_size; i++)
4753 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4755 ix86_schedule = processor_alias_table[i].schedule;
4756 ix86_arch = processor_alias_table[i].processor;
4757 /* Default cpu tuning to the architecture. */
4758 ix86_tune = ix86_arch;
4760 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4761 && !(processor_alias_table[i].flags & PTA_64BIT))
4762 error ("CPU you selected does not support x86-64 "
4765 if (processor_alias_table[i].flags & PTA_MMX
4766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4768 if (processor_alias_table[i].flags & PTA_3DNOW
4769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4771 if (processor_alias_table[i].flags & PTA_3DNOW_A
4772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4774 if (processor_alias_table[i].flags & PTA_SSE
4775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4777 if (processor_alias_table[i].flags & PTA_SSE2
4778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4780 if (processor_alias_table[i].flags & PTA_SSE3
4781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4783 if (processor_alias_table[i].flags & PTA_SSSE3
4784 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4786 if (processor_alias_table[i].flags & PTA_SSE4_1
4787 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4788 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4789 if (processor_alias_table[i].flags & PTA_SSE4_2
4790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4792 if (processor_alias_table[i].flags & PTA_AVX
4793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4795 if (processor_alias_table[i].flags & PTA_AVX2
4796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4798 if (processor_alias_table[i].flags & PTA_FMA
4799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4801 if (processor_alias_table[i].flags & PTA_SSE4A
4802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4804 if (processor_alias_table[i].flags & PTA_FMA4
4805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4807 if (processor_alias_table[i].flags & PTA_XOP
4808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4810 if (processor_alias_table[i].flags & PTA_LWP
4811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4813 if (processor_alias_table[i].flags & PTA_ABM
4814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4816 if (processor_alias_table[i].flags & PTA_BMI
4817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4819 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4822 if (processor_alias_table[i].flags & PTA_TBM
4823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4825 if (processor_alias_table[i].flags & PTA_BMI2
4826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4828 if (processor_alias_table[i].flags & PTA_CX16
4829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4831 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4834 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4835 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4836 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4837 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4838 if (processor_alias_table[i].flags & PTA_MOVBE
4839 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4840 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4841 if (processor_alias_table[i].flags & PTA_AES
4842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4843 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4844 if (processor_alias_table[i].flags & PTA_SHA
4845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4846 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4847 if (processor_alias_table[i].flags & PTA_PCLMUL
4848 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4849 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4850 if (processor_alias_table[i].flags & PTA_FSGSBASE
4851 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4853 if (processor_alias_table[i].flags & PTA_RDRND
4854 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4855 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4856 if (processor_alias_table[i].flags & PTA_F16C
4857 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4859 if (processor_alias_table[i].flags & PTA_RTM
4860 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4861 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4862 if (processor_alias_table[i].flags & PTA_HLE
4863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4865 if (processor_alias_table[i].flags & PTA_PRFCHW
4866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4868 if (processor_alias_table[i].flags & PTA_RDSEED
4869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4871 if (processor_alias_table[i].flags & PTA_ADX
4872 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4874 if (processor_alias_table[i].flags & PTA_FXSR
4875 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4876 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4877 if (processor_alias_table[i].flags & PTA_XSAVE
4878 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4879 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4880 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4881 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4882 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4883 if (processor_alias_table[i].flags & PTA_AVX512F
4884 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4885 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4886 if (processor_alias_table[i].flags & PTA_AVX512ER
4887 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4888 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4889 if (processor_alias_table[i].flags & PTA_AVX512PF
4890 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4891 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4892 if (processor_alias_table[i].flags & PTA_AVX512CD
4893 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4894 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4895 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4896 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4898 if (processor_alias_table[i].flags & PTA_PCOMMIT
4899 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4900 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4901 if (processor_alias_table[i].flags & PTA_CLWB
4902 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4903 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4904 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4905 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4906 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4907 if (processor_alias_table[i].flags & PTA_CLZERO
4908 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4910 if (processor_alias_table[i].flags & PTA_XSAVEC
4911 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4912 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4913 if (processor_alias_table[i].flags & PTA_XSAVES
4914 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4915 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4916 if (processor_alias_table[i].flags & PTA_AVX512DQ
4917 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4918 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4919 if (processor_alias_table[i].flags & PTA_AVX512BW
4920 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4921 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4922 if (processor_alias_table[i].flags & PTA_AVX512VL
4923 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4924 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4925 if (processor_alias_table[i].flags & PTA_MPX
4926 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4927 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4928 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4929 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4931 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4934 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4935 x86_prefetch_sse = true;
4936 if (processor_alias_table[i].flags & PTA_MWAITX
4937 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4938 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4939 if (processor_alias_table[i].flags & PTA_PKU
4940 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4941 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4943 if (!(opts_set->x_target_flags & MASK_80387))
4945 if (processor_alias_table[i].flags & PTA_NO_80387)
4946 opts->x_target_flags &= ~MASK_80387;
4948 opts->x_target_flags |= MASK_80387;
4953 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4954 error ("Intel MPX does not support x32");
4956 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4957 error ("Intel MPX does not support x32");
4959 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4960 error ("generic CPU can be used only for %stune=%s %s",
4961 prefix, suffix, sw);
4962 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4963 error ("intel CPU can be used only for %stune=%s %s",
4964 prefix, suffix, sw);
4965 else if (i == pta_size)
4966 error ("bad value (%s) for %sarch=%s %s",
4967 opts->x_ix86_arch_string, prefix, suffix, sw);
4969 ix86_arch_mask = 1u << ix86_arch;
4970 for (i = 0; i < X86_ARCH_LAST; ++i)
4971 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4973 for (i = 0; i < pta_size; i++)
4974 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4976 ix86_schedule = processor_alias_table[i].schedule;
4977 ix86_tune = processor_alias_table[i].processor;
4978 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4980 if (!(processor_alias_table[i].flags & PTA_64BIT))
4982 if (ix86_tune_defaulted)
4984 opts->x_ix86_tune_string = "x86-64";
4985 for (i = 0; i < pta_size; i++)
4986 if (! strcmp (opts->x_ix86_tune_string,
4987 processor_alias_table[i].name))
4989 ix86_schedule = processor_alias_table[i].schedule;
4990 ix86_tune = processor_alias_table[i].processor;
4993 error ("CPU you selected does not support x86-64 "
4997 /* Intel CPUs have always interpreted SSE prefetch instructions as
4998 NOPs; so, we can enable SSE prefetch instructions even when
4999 -mtune (rather than -march) points us to a processor that has them.
5000 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5001 higher processors. */
5003 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5004 x86_prefetch_sse = true;
5008 if (ix86_tune_specified && i == pta_size)
5009 error ("bad value (%s) for %stune=%s %s",
5010 opts->x_ix86_tune_string, prefix, suffix, sw);
5012 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5014 #ifndef USE_IX86_FRAME_POINTER
5015 #define USE_IX86_FRAME_POINTER 0
5018 #ifndef USE_X86_64_FRAME_POINTER
5019 #define USE_X86_64_FRAME_POINTER 0
5022 /* Set the default values for switches whose default depends on TARGET_64BIT
5023 in case they weren't overwritten by command line options. */
5024 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5026 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5027 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5028 if (opts->x_flag_asynchronous_unwind_tables
5029 && !opts_set->x_flag_unwind_tables
5030 && TARGET_64BIT_MS_ABI)
5031 opts->x_flag_unwind_tables = 1;
5032 if (opts->x_flag_asynchronous_unwind_tables == 2)
5033 opts->x_flag_unwind_tables
5034 = opts->x_flag_asynchronous_unwind_tables = 1;
5035 if (opts->x_flag_pcc_struct_return == 2)
5036 opts->x_flag_pcc_struct_return = 0;
5040 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5041 opts->x_flag_omit_frame_pointer
5042 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5043 if (opts->x_flag_asynchronous_unwind_tables == 2)
5044 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5045 if (opts->x_flag_pcc_struct_return == 2)
5047 /* Intel MCU psABI specifies that -freg-struct-return should
5048 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5049 we check -miamcu so that -freg-struct-return is always
5050 turned on if -miamcu is used. */
5051 if (TARGET_IAMCU_P (opts->x_target_flags))
5052 opts->x_flag_pcc_struct_return = 0;
5054 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5058 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5059 /* TODO: ix86_cost should be chosen at instruction or function granuality
5060 so for cold code we use size_cost even in !optimize_size compilation. */
5061 if (opts->x_optimize_size)
5062 ix86_cost = &ix86_size_cost;
5064 ix86_cost = ix86_tune_cost;
5066 /* Arrange to set up i386_stack_locals for all functions. */
5067 init_machine_status = ix86_init_machine_status;
5069 /* Validate -mregparm= value. */
5070 if (opts_set->x_ix86_regparm)
5072 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5073 warning (0, "-mregparm is ignored in 64-bit mode");
5074 else if (TARGET_IAMCU_P (opts->x_target_flags))
5075 warning (0, "-mregparm is ignored for Intel MCU psABI");
5076 if (opts->x_ix86_regparm > REGPARM_MAX)
5078 error ("-mregparm=%d is not between 0 and %d",
5079 opts->x_ix86_regparm, REGPARM_MAX);
5080 opts->x_ix86_regparm = 0;
5083 if (TARGET_IAMCU_P (opts->x_target_flags)
5084 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5085 opts->x_ix86_regparm = REGPARM_MAX;
5087 /* Default align_* from the processor table. */
5088 ix86_default_align (opts);
5090 /* Provide default for -mbranch-cost= value. */
5091 if (!opts_set->x_ix86_branch_cost)
5092 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5094 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5096 opts->x_target_flags
5097 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5099 /* Enable by default the SSE and MMX builtins. Do allow the user to
5100 explicitly disable any of these. In particular, disabling SSE and
5101 MMX for kernel code is extremely useful. */
5102 if (!ix86_arch_specified)
5103 opts->x_ix86_isa_flags
5104 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5105 | TARGET_SUBTARGET64_ISA_DEFAULT)
5106 & ~opts->x_ix86_isa_flags_explicit);
5108 if (TARGET_RTD_P (opts->x_target_flags))
5109 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5113 opts->x_target_flags
5114 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5116 if (!ix86_arch_specified)
5117 opts->x_ix86_isa_flags
5118 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5120 /* i386 ABI does not specify red zone. It still makes sense to use it
5121 when programmer takes care to stack from being destroyed. */
5122 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5123 opts->x_target_flags |= MASK_NO_RED_ZONE;
5126 /* Keep nonleaf frame pointers. */
5127 if (opts->x_flag_omit_frame_pointer)
5128 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5129 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5130 opts->x_flag_omit_frame_pointer = 1;
5132 /* If we're doing fast math, we don't care about comparison order
5133 wrt NaNs. This lets us use a shorter comparison sequence. */
5134 if (opts->x_flag_finite_math_only)
5135 opts->x_target_flags &= ~MASK_IEEE_FP;
5137 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5138 since the insns won't need emulation. */
5139 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5140 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5142 /* Likewise, if the target doesn't have a 387, or we've specified
5143 software floating point, don't use 387 inline intrinsics. */
5144 if (!TARGET_80387_P (opts->x_target_flags))
5145 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5147 /* Turn on MMX builtins for -msse. */
5148 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5149 opts->x_ix86_isa_flags
5150 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5152 /* Enable SSE prefetch. */
5153 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5154 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5155 x86_prefetch_sse = true;
5157 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5158 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5159 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5160 opts->x_ix86_isa_flags
5161 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5163 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5164 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5165 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5166 opts->x_ix86_isa_flags
5167 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5169 /* Enable lzcnt instruction for -mabm. */
5170 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5171 opts->x_ix86_isa_flags
5172 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5174 /* Validate -mpreferred-stack-boundary= value or default it to
5175 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5176 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5177 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5179 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5180 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5181 int max = (TARGET_SEH ? 4 : 12);
5183 if (opts->x_ix86_preferred_stack_boundary_arg < min
5184 || opts->x_ix86_preferred_stack_boundary_arg > max)
5187 error ("-mpreferred-stack-boundary is not supported "
5190 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5191 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5194 ix86_preferred_stack_boundary
5195 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5198 /* Set the default value for -mstackrealign. */
5199 if (opts->x_ix86_force_align_arg_pointer == -1)
5200 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5202 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5204 /* Validate -mincoming-stack-boundary= value or default it to
5205 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5206 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5207 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5209 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5211 if (opts->x_ix86_incoming_stack_boundary_arg < min
5212 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5214 opts->x_ix86_incoming_stack_boundary_arg, min);
5217 ix86_user_incoming_stack_boundary
5218 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5219 ix86_incoming_stack_boundary
5220 = ix86_user_incoming_stack_boundary;
5224 #ifndef NO_PROFILE_COUNTERS
5225 if (flag_nop_mcount)
5226 error ("-mnop-mcount is not compatible with this target");
5228 if (flag_nop_mcount && flag_pic)
5229 error ("-mnop-mcount is not implemented for -fPIC");
5231 /* Accept -msseregparm only if at least SSE support is enabled. */
5232 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5233 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5234 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5236 if (opts_set->x_ix86_fpmath)
5238 if (opts->x_ix86_fpmath & FPMATH_SSE)
5240 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5242 if (TARGET_80387_P (opts->x_target_flags))
5244 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5245 opts->x_ix86_fpmath = FPMATH_387;
5248 else if ((opts->x_ix86_fpmath & FPMATH_387)
5249 && !TARGET_80387_P (opts->x_target_flags))
5251 warning (0, "387 instruction set disabled, using SSE arithmetics");
5252 opts->x_ix86_fpmath = FPMATH_SSE;
5256 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5257 fpmath=387. The second is however default at many targets since the
5258 extra 80bit precision of temporaries is considered to be part of ABI.
5259 Overwrite the default at least for -ffast-math.
5260 TODO: -mfpmath=both seems to produce same performing code with bit
5261 smaller binaries. It is however not clear if register allocation is
5262 ready for this setting.
5263 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5264 codegen. We may switch to 387 with -ffast-math for size optimized
5266 else if (fast_math_flags_set_p (&global_options)
5267 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5268 opts->x_ix86_fpmath = FPMATH_SSE;
5270 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5272 /* Use external vectorized library in vectorizing intrinsics. */
5273 if (opts_set->x_ix86_veclibabi_type)
5274 switch (opts->x_ix86_veclibabi_type)
5276 case ix86_veclibabi_type_svml:
5277 ix86_veclib_handler = ix86_veclibabi_svml;
5280 case ix86_veclibabi_type_acml:
5281 ix86_veclib_handler = ix86_veclibabi_acml;
5288 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5289 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5290 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5292 /* If stack probes are required, the space used for large function
5293 arguments on the stack must also be probed, so enable
5294 -maccumulate-outgoing-args so this happens in the prologue. */
5295 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5296 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5298 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5299 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5300 "for correctness", prefix, suffix);
5301 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5304 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5305 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5306 if (fixed_regs[BP_REG]
5307 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5309 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5310 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5312 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5315 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5318 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5319 p = strchr (internal_label_prefix, 'X');
5320 internal_label_prefix_len = p - internal_label_prefix;
5324 /* When scheduling description is not available, disable scheduler pass
5325 so it won't slow down the compilation and make x87 code slower. */
5326 if (!TARGET_SCHEDULE)
5327 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5329 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5330 ix86_tune_cost->simultaneous_prefetches,
5331 opts->x_param_values,
5332 opts_set->x_param_values);
5333 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5334 ix86_tune_cost->prefetch_block,
5335 opts->x_param_values,
5336 opts_set->x_param_values);
5337 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5338 ix86_tune_cost->l1_cache_size,
5339 opts->x_param_values,
5340 opts_set->x_param_values);
5341 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5342 ix86_tune_cost->l2_cache_size,
5343 opts->x_param_values,
5344 opts_set->x_param_values);
5346 /* Restrict number of if-converted SET insns to 1. */
5347 if (TARGET_ONE_IF_CONV_INSN)
5348 maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
5350 opts->x_param_values,
5351 opts_set->x_param_values);
5353 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5354 if (opts->x_flag_prefetch_loop_arrays < 0
5356 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5357 && !opts->x_optimize_size
5358 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5359 opts->x_flag_prefetch_loop_arrays = 1;
5361 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5362 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5363 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5364 targetm.expand_builtin_va_start = NULL;
5366 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5368 ix86_gen_leave = gen_leave_rex64;
5369 if (Pmode == DImode)
5371 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5372 ix86_gen_tls_local_dynamic_base_64
5373 = gen_tls_local_dynamic_base_64_di;
5377 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5378 ix86_gen_tls_local_dynamic_base_64
5379 = gen_tls_local_dynamic_base_64_si;
5383 ix86_gen_leave = gen_leave;
5385 if (Pmode == DImode)
5387 ix86_gen_add3 = gen_adddi3;
5388 ix86_gen_sub3 = gen_subdi3;
5389 ix86_gen_sub3_carry = gen_subdi3_carry;
5390 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5391 ix86_gen_andsp = gen_anddi3;
5392 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5393 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5394 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5395 ix86_gen_monitor = gen_sse3_monitor_di;
5396 ix86_gen_monitorx = gen_monitorx_di;
5397 ix86_gen_clzero = gen_clzero_di;
5401 ix86_gen_add3 = gen_addsi3;
5402 ix86_gen_sub3 = gen_subsi3;
5403 ix86_gen_sub3_carry = gen_subsi3_carry;
5404 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5405 ix86_gen_andsp = gen_andsi3;
5406 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5407 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5408 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5409 ix86_gen_monitor = gen_sse3_monitor_si;
5410 ix86_gen_monitorx = gen_monitorx_si;
5411 ix86_gen_clzero = gen_clzero_si;
5415 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5416 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5417 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5420 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5422 if (opts->x_flag_fentry > 0)
5423 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5425 opts->x_flag_fentry = 0;
5427 else if (TARGET_SEH)
5429 if (opts->x_flag_fentry == 0)
5430 sorry ("-mno-fentry isn%'t compatible with SEH");
5431 opts->x_flag_fentry = 1;
5433 else if (opts->x_flag_fentry < 0)
5435 #if defined(PROFILE_BEFORE_PROLOGUE)
5436 opts->x_flag_fentry = 1;
5438 opts->x_flag_fentry = 0;
5442 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5443 opts->x_target_flags |= MASK_VZEROUPPER;
5444 if (!(opts_set->x_target_flags & MASK_STV))
5445 opts->x_target_flags |= MASK_STV;
5446 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5447 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5448 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5449 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5450 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5451 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5452 /* Enable 128-bit AVX instruction generation
5453 for the auto-vectorizer. */
5454 if (TARGET_AVX128_OPTIMAL
5455 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5456 opts->x_target_flags |= MASK_PREFER_AVX128;
5458 if (opts->x_ix86_recip_name)
5460 char *p = ASTRDUP (opts->x_ix86_recip_name);
5462 unsigned int mask, i;
5465 while ((q = strtok (p, ",")) != NULL)
5476 if (!strcmp (q, "default"))
5477 mask = RECIP_MASK_ALL;
5480 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5481 if (!strcmp (q, recip_options[i].string))
5483 mask = recip_options[i].mask;
5487 if (i == ARRAY_SIZE (recip_options))
5489 error ("unknown option for -mrecip=%s", q);
5491 mask = RECIP_MASK_NONE;
5495 opts->x_recip_mask_explicit |= mask;
5497 opts->x_recip_mask &= ~mask;
5499 opts->x_recip_mask |= mask;
5503 if (TARGET_RECIP_P (opts->x_target_flags))
5504 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5505 else if (opts_set->x_target_flags & MASK_RECIP)
5506 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5508 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5509 for 64-bit Bionic. Also default long double to 64-bit for Intel
5511 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5512 && !(opts_set->x_target_flags
5513 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5514 opts->x_target_flags |= (TARGET_64BIT
5515 ? MASK_LONG_DOUBLE_128
5516 : MASK_LONG_DOUBLE_64);
5518 /* Only one of them can be active. */
5519 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5520 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5522 /* Save the initial options in case the user does function specific
5525 target_option_default_node = target_option_current_node
5526 = build_target_option_node (opts);
5528 /* Handle stack protector */
5529 if (!opts_set->x_ix86_stack_protector_guard)
5530 opts->x_ix86_stack_protector_guard
5531 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5533 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5534 if (opts->x_ix86_tune_memcpy_strategy)
5536 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5537 ix86_parse_stringop_strategy_string (str, false);
5541 if (opts->x_ix86_tune_memset_strategy)
5543 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5544 ix86_parse_stringop_strategy_string (str, true);
5549 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5552 ix86_option_override (void)
5554 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5555 struct register_pass_info insert_vzeroupper_info
5556 = { pass_insert_vzeroupper, "reload",
5557 1, PASS_POS_INSERT_AFTER
5559 opt_pass *pass_stv = make_pass_stv (g);
5560 struct register_pass_info stv_info
5561 = { pass_stv, "combine",
5562 1, PASS_POS_INSERT_AFTER
5565 ix86_option_override_internal (true, &global_options, &global_options_set);
5568 /* This needs to be done at start up. It's convenient to do it here. */
5569 register_pass (&insert_vzeroupper_info);
5570 register_pass (&stv_info);
5573 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5575 ix86_offload_options (void)
5578 return xstrdup ("-foffload-abi=lp64");
5579 return xstrdup ("-foffload-abi=ilp32");
5582 /* Update register usage after having seen the compiler flags. */
5585 ix86_conditional_register_usage (void)
5589 /* For 32-bit targets, squash the REX registers. */
5592 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5593 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5594 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5595 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5596 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5597 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5600 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5601 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5603 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5605 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5607 /* Set/reset conditionally defined registers from
5608 CALL_USED_REGISTERS initializer. */
5609 if (call_used_regs[i] > 1)
5610 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5612 /* Calculate registers of CLOBBERED_REGS register set
5613 as call used registers from GENERAL_REGS register set. */
5614 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5615 && call_used_regs[i])
5616 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5619 /* If MMX is disabled, squash the registers. */
5621 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5622 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5623 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5625 /* If SSE is disabled, squash the registers. */
5627 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5628 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5629 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5631 /* If the FPU is disabled, squash the registers. */
5632 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5633 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5634 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5635 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5637 /* If AVX512F is disabled, squash the registers. */
5638 if (! TARGET_AVX512F)
5640 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5641 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5643 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5644 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5647 /* If MPX is disabled, squash the registers. */
5649 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5650 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5654 /* Save the current options */
5657 ix86_function_specific_save (struct cl_target_option *ptr,
5658 struct gcc_options *opts)
5660 ptr->arch = ix86_arch;
5661 ptr->schedule = ix86_schedule;
5662 ptr->prefetch_sse = x86_prefetch_sse;
5663 ptr->tune = ix86_tune;
5664 ptr->branch_cost = ix86_branch_cost;
5665 ptr->tune_defaulted = ix86_tune_defaulted;
5666 ptr->arch_specified = ix86_arch_specified;
5667 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5668 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5669 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5670 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5671 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5672 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5673 ptr->x_ix86_abi = opts->x_ix86_abi;
5674 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5675 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5676 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5677 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5678 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5679 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5680 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5681 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5682 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5683 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5684 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5685 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5686 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5687 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5688 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5689 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5690 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5691 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5692 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5693 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5695 /* The fields are char but the variables are not; make sure the
5696 values fit in the fields. */
5697 gcc_assert (ptr->arch == ix86_arch);
5698 gcc_assert (ptr->schedule == ix86_schedule);
5699 gcc_assert (ptr->tune == ix86_tune);
5700 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5703 /* Restore the current options */
5706 ix86_function_specific_restore (struct gcc_options *opts,
5707 struct cl_target_option *ptr)
5709 enum processor_type old_tune = ix86_tune;
5710 enum processor_type old_arch = ix86_arch;
5711 unsigned int ix86_arch_mask;
5714 /* We don't change -fPIC. */
5715 opts->x_flag_pic = flag_pic;
5717 ix86_arch = (enum processor_type) ptr->arch;
5718 ix86_schedule = (enum attr_cpu) ptr->schedule;
5719 ix86_tune = (enum processor_type) ptr->tune;
5720 x86_prefetch_sse = ptr->prefetch_sse;
5721 opts->x_ix86_branch_cost = ptr->branch_cost;
5722 ix86_tune_defaulted = ptr->tune_defaulted;
5723 ix86_arch_specified = ptr->arch_specified;
5724 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5725 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5726 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5727 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5728 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5729 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5730 opts->x_ix86_abi = ptr->x_ix86_abi;
5731 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5732 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5733 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5734 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5735 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5736 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5737 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5738 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5739 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5740 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5741 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5742 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5743 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5744 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5745 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5746 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5747 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5748 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5749 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5750 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5751 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5752 /* TODO: ix86_cost should be chosen at instruction or function granuality
5753 so for cold code we use size_cost even in !optimize_size compilation. */
5754 if (opts->x_optimize_size)
5755 ix86_cost = &ix86_size_cost;
5757 ix86_cost = ix86_tune_cost;
5759 /* Recreate the arch feature tests if the arch changed */
5760 if (old_arch != ix86_arch)
5762 ix86_arch_mask = 1u << ix86_arch;
5763 for (i = 0; i < X86_ARCH_LAST; ++i)
5764 ix86_arch_features[i]
5765 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5768 /* Recreate the tune optimization tests */
5769 if (old_tune != ix86_tune)
5770 set_ix86_tune_features (ix86_tune, false);
5773 /* Adjust target options after streaming them in. This is mainly about
5774 reconciling them with global options. */
5777 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5779 /* flag_pic is a global option, but ix86_cmodel is target saved option
5780 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5781 for PIC, or error out. */
5783 switch (ptr->x_ix86_cmodel)
5786 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5790 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5794 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5798 error ("code model %s does not support PIC mode", "kernel");
5805 switch (ptr->x_ix86_cmodel)
5808 ptr->x_ix86_cmodel = CM_SMALL;
5812 ptr->x_ix86_cmodel = CM_MEDIUM;
5816 ptr->x_ix86_cmodel = CM_LARGE;
5824 /* Print the current options */
5827 ix86_function_specific_print (FILE *file, int indent,
5828 struct cl_target_option *ptr)
5831 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5832 NULL, NULL, ptr->x_ix86_fpmath, false);
5834 gcc_assert (ptr->arch < PROCESSOR_max);
5835 fprintf (file, "%*sarch = %d (%s)\n",
5837 ptr->arch, processor_target_table[ptr->arch].name);
5839 gcc_assert (ptr->tune < PROCESSOR_max);
5840 fprintf (file, "%*stune = %d (%s)\n",
5842 ptr->tune, processor_target_table[ptr->tune].name);
5844 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5848 fprintf (file, "%*s%s\n", indent, "", target_string);
5849 free (target_string);
5854 /* Inner function to process the attribute((target(...))), take an argument and
5855 set the current options from the argument. If we have a list, recursively go
5859 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5860 struct gcc_options *opts,
5861 struct gcc_options *opts_set,
5862 struct gcc_options *enum_opts_set)
5867 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5868 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5869 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5870 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5871 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5887 enum ix86_opt_type type;
5892 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5893 IX86_ATTR_ISA ("abm", OPT_mabm),
5894 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5895 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5896 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5897 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5898 IX86_ATTR_ISA ("aes", OPT_maes),
5899 IX86_ATTR_ISA ("sha", OPT_msha),
5900 IX86_ATTR_ISA ("avx", OPT_mavx),
5901 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5902 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5903 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5904 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5905 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5906 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5907 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5908 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5909 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5910 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5911 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5912 IX86_ATTR_ISA ("sse", OPT_msse),
5913 IX86_ATTR_ISA ("sse2", OPT_msse2),
5914 IX86_ATTR_ISA ("sse3", OPT_msse3),
5915 IX86_ATTR_ISA ("sse4", OPT_msse4),
5916 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5917 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5918 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5919 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5920 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5921 IX86_ATTR_ISA ("fma", OPT_mfma),
5922 IX86_ATTR_ISA ("xop", OPT_mxop),
5923 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5924 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5925 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5926 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5927 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5928 IX86_ATTR_ISA ("hle", OPT_mhle),
5929 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5930 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5931 IX86_ATTR_ISA ("adx", OPT_madx),
5932 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5933 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5934 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5935 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5936 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5937 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5938 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5939 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5940 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5941 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5942 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5943 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5944 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5945 IX86_ATTR_ISA ("pku", OPT_mpku),
5948 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5950 /* string options */
5951 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5952 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5955 IX86_ATTR_YES ("cld",
5959 IX86_ATTR_NO ("fancy-math-387",
5960 OPT_mfancy_math_387,
5961 MASK_NO_FANCY_MATH_387),
5963 IX86_ATTR_YES ("ieee-fp",
5967 IX86_ATTR_YES ("inline-all-stringops",
5968 OPT_minline_all_stringops,
5969 MASK_INLINE_ALL_STRINGOPS),
5971 IX86_ATTR_YES ("inline-stringops-dynamically",
5972 OPT_minline_stringops_dynamically,
5973 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5975 IX86_ATTR_NO ("align-stringops",
5976 OPT_mno_align_stringops,
5977 MASK_NO_ALIGN_STRINGOPS),
5979 IX86_ATTR_YES ("recip",
5985 /* If this is a list, recurse to get the options. */
5986 if (TREE_CODE (args) == TREE_LIST)
5990 for (; args; args = TREE_CHAIN (args))
5991 if (TREE_VALUE (args)
5992 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5993 p_strings, opts, opts_set,
6000 else if (TREE_CODE (args) != STRING_CST)
6002 error ("attribute %<target%> argument not a string");
6006 /* Handle multiple arguments separated by commas. */
6007 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6009 while (next_optstr && *next_optstr != '\0')
6011 char *p = next_optstr;
6013 char *comma = strchr (next_optstr, ',');
6014 const char *opt_string;
6015 size_t len, opt_len;
6020 enum ix86_opt_type type = ix86_opt_unknown;
6026 len = comma - next_optstr;
6027 next_optstr = comma + 1;
6035 /* Recognize no-xxx. */
6036 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6045 /* Find the option. */
6048 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6050 type = attrs[i].type;
6051 opt_len = attrs[i].len;
6052 if (ch == attrs[i].string[0]
6053 && ((type != ix86_opt_str && type != ix86_opt_enum)
6056 && memcmp (p, attrs[i].string, opt_len) == 0)
6059 mask = attrs[i].mask;
6060 opt_string = attrs[i].string;
6065 /* Process the option. */
6068 error ("attribute(target(\"%s\")) is unknown", orig_p);
6072 else if (type == ix86_opt_isa)
6074 struct cl_decoded_option decoded;
6076 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6077 ix86_handle_option (opts, opts_set,
6078 &decoded, input_location);
6081 else if (type == ix86_opt_yes || type == ix86_opt_no)
6083 if (type == ix86_opt_no)
6084 opt_set_p = !opt_set_p;
6087 opts->x_target_flags |= mask;
6089 opts->x_target_flags &= ~mask;
6092 else if (type == ix86_opt_str)
6096 error ("option(\"%s\") was already specified", opt_string);
6100 p_strings[opt] = xstrdup (p + opt_len);
6103 else if (type == ix86_opt_enum)
6108 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6110 set_option (opts, enum_opts_set, opt, value,
6111 p + opt_len, DK_UNSPECIFIED, input_location,
6115 error ("attribute(target(\"%s\")) is unknown", orig_p);
6127 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6130 ix86_valid_target_attribute_tree (tree args,
6131 struct gcc_options *opts,
6132 struct gcc_options *opts_set)
6134 const char *orig_arch_string = opts->x_ix86_arch_string;
6135 const char *orig_tune_string = opts->x_ix86_tune_string;
6136 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6137 int orig_tune_defaulted = ix86_tune_defaulted;
6138 int orig_arch_specified = ix86_arch_specified;
6139 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6142 struct cl_target_option *def
6143 = TREE_TARGET_OPTION (target_option_default_node);
6144 struct gcc_options enum_opts_set;
6146 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6148 /* Process each of the options on the chain. */
6149 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6150 opts_set, &enum_opts_set))
6151 return error_mark_node;
6153 /* If the changed options are different from the default, rerun
6154 ix86_option_override_internal, and then save the options away.
6155 The string options are attribute options, and will be undone
6156 when we copy the save structure. */
6157 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6158 || opts->x_target_flags != def->x_target_flags
6159 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6160 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6161 || enum_opts_set.x_ix86_fpmath)
6163 /* If we are using the default tune= or arch=, undo the string assigned,
6164 and use the default. */
6165 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6167 opts->x_ix86_arch_string
6168 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6170 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6171 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6172 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6173 | OPTION_MASK_ABI_64
6174 | OPTION_MASK_ABI_X32
6175 | OPTION_MASK_CODE16);
6178 else if (!orig_arch_specified)
6179 opts->x_ix86_arch_string = NULL;
6181 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6182 opts->x_ix86_tune_string
6183 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6184 else if (orig_tune_defaulted)
6185 opts->x_ix86_tune_string = NULL;
6187 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6188 if (enum_opts_set.x_ix86_fpmath)
6189 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6190 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6191 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6193 if (TARGET_80387_P (opts->x_target_flags))
6194 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6197 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6198 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6201 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6202 ix86_option_override_internal (false, opts, opts_set);
6204 /* Add any builtin functions with the new isa if any. */
6205 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6207 /* Save the current options unless we are validating options for
6209 t = build_target_option_node (opts);
6211 opts->x_ix86_arch_string = orig_arch_string;
6212 opts->x_ix86_tune_string = orig_tune_string;
6213 opts_set->x_ix86_fpmath = orig_fpmath_set;
6215 /* Free up memory allocated to hold the strings */
6216 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6217 free (option_strings[i]);
6223 /* Hook to validate attribute((target("string"))). */
6226 ix86_valid_target_attribute_p (tree fndecl,
6227 tree ARG_UNUSED (name),
6229 int ARG_UNUSED (flags))
6231 struct gcc_options func_options;
6232 tree new_target, new_optimize;
6235 /* attribute((target("default"))) does nothing, beyond
6236 affecting multi-versioning. */
6237 if (TREE_VALUE (args)
6238 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6239 && TREE_CHAIN (args) == NULL_TREE
6240 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6243 tree old_optimize = build_optimization_node (&global_options);
6245 /* Get the optimization options of the current function. */
6246 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6249 func_optimize = old_optimize;
6251 /* Init func_options. */
6252 memset (&func_options, 0, sizeof (func_options));
6253 init_options_struct (&func_options, NULL);
6254 lang_hooks.init_options_struct (&func_options);
6256 cl_optimization_restore (&func_options,
6257 TREE_OPTIMIZATION (func_optimize));
6259 /* Initialize func_options to the default before its target options can
6261 cl_target_option_restore (&func_options,
6262 TREE_TARGET_OPTION (target_option_default_node));
6264 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6265 &global_options_set);
6267 new_optimize = build_optimization_node (&func_options);
6269 if (new_target == error_mark_node)
6272 else if (fndecl && new_target)
6274 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6276 if (old_optimize != new_optimize)
6277 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6280 finalize_options_struct (&func_options);
6286 /* Hook to determine if one function can safely inline another. */
6289 ix86_can_inline_p (tree caller, tree callee)
6292 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6293 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6295 /* If callee has no option attributes, then it is ok to inline. */
6299 /* If caller has no option attributes, but callee does then it is not ok to
6301 else if (!caller_tree)
6306 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6307 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6309 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6310 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6312 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6313 != callee_opts->x_ix86_isa_flags)
6316 /* See if we have the same non-isa options. */
6317 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6320 /* See if arch, tune, etc. are the same. */
6321 else if (caller_opts->arch != callee_opts->arch)
6324 else if (caller_opts->tune != callee_opts->tune)
6327 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6330 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6341 /* Remember the last target of ix86_set_current_function. */
6342 static GTY(()) tree ix86_previous_fndecl;
6344 /* Set targets globals to the default (or current #pragma GCC target
6345 if active). Invalidate ix86_previous_fndecl cache. */
6348 ix86_reset_previous_fndecl (void)
6350 tree new_tree = target_option_current_node;
6351 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6352 if (TREE_TARGET_GLOBALS (new_tree))
6353 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6354 else if (new_tree == target_option_default_node)
6355 restore_target_globals (&default_target_globals);
6357 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6358 ix86_previous_fndecl = NULL_TREE;
6361 /* Establish appropriate back-end context for processing the function
6362 FNDECL. The argument might be NULL to indicate processing at top
6363 level, outside of any function scope. */
6365 ix86_set_current_function (tree fndecl)
6367 /* Only change the context if the function changes. This hook is called
6368 several times in the course of compiling a function, and we don't want to
6369 slow things down too much or call target_reinit when it isn't safe. */
6370 if (fndecl == ix86_previous_fndecl)
6374 if (ix86_previous_fndecl == NULL_TREE)
6375 old_tree = target_option_current_node;
6376 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6377 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6379 old_tree = target_option_default_node;
6381 if (fndecl == NULL_TREE)
6383 if (old_tree != target_option_current_node)
6384 ix86_reset_previous_fndecl ();
6388 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6389 if (new_tree == NULL_TREE)
6390 new_tree = target_option_default_node;
6392 if (old_tree != new_tree)
6394 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6395 if (TREE_TARGET_GLOBALS (new_tree))
6396 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6397 else if (new_tree == target_option_default_node)
6398 restore_target_globals (&default_target_globals);
6400 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6402 ix86_previous_fndecl = fndecl;
6404 /* 64-bit MS and SYSV ABI have different set of call used registers.
6405 Avoid expensive re-initialization of init_regs each time we switch
6406 function context. */
6408 && (call_used_regs[SI_REG]
6409 == (cfun->machine->call_abi == MS_ABI)))
6414 /* Return true if this goes in large data/bss. */
6417 ix86_in_large_data_p (tree exp)
6419 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6422 /* Functions are never large data. */
6423 if (TREE_CODE (exp) == FUNCTION_DECL)
6426 /* Automatic variables are never large data. */
6427 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6430 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6432 const char *section = DECL_SECTION_NAME (exp);
6433 if (strcmp (section, ".ldata") == 0
6434 || strcmp (section, ".lbss") == 0)
6440 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6442 /* If this is an incomplete type with size 0, then we can't put it
6443 in data because it might be too big when completed. Also,
6444 int_size_in_bytes returns -1 if size can vary or is larger than
6445 an integer in which case also it is safer to assume that it goes in
6447 if (size <= 0 || size > ix86_section_threshold)
6454 /* Switch to the appropriate section for output of DECL.
6455 DECL is either a `VAR_DECL' node or a constant of some sort.
6456 RELOC indicates whether forming the initial value of DECL requires
6457 link-time relocations. */
6459 ATTRIBUTE_UNUSED static section *
6460 x86_64_elf_select_section (tree decl, int reloc,
6461 unsigned HOST_WIDE_INT align)
6463 if (ix86_in_large_data_p (decl))
6465 const char *sname = NULL;
6466 unsigned int flags = SECTION_WRITE;
6467 switch (categorize_decl_for_section (decl, reloc))
6472 case SECCAT_DATA_REL:
6473 sname = ".ldata.rel";
6475 case SECCAT_DATA_REL_LOCAL:
6476 sname = ".ldata.rel.local";
6478 case SECCAT_DATA_REL_RO:
6479 sname = ".ldata.rel.ro";
6481 case SECCAT_DATA_REL_RO_LOCAL:
6482 sname = ".ldata.rel.ro.local";
6486 flags |= SECTION_BSS;
6489 case SECCAT_RODATA_MERGE_STR:
6490 case SECCAT_RODATA_MERGE_STR_INIT:
6491 case SECCAT_RODATA_MERGE_CONST:
6495 case SECCAT_SRODATA:
6502 /* We don't split these for medium model. Place them into
6503 default sections and hope for best. */
6508 /* We might get called with string constants, but get_named_section
6509 doesn't like them as they are not DECLs. Also, we need to set
6510 flags in that case. */
6512 return get_section (sname, flags, NULL);
6513 return get_named_section (decl, sname, reloc);
6516 return default_elf_select_section (decl, reloc, align);
6519 /* Select a set of attributes for section NAME based on the properties
6520 of DECL and whether or not RELOC indicates that DECL's initializer
6521 might contain runtime relocations. */
6523 static unsigned int ATTRIBUTE_UNUSED
6524 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6526 unsigned int flags = default_section_type_flags (decl, name, reloc);
6528 if (decl == NULL_TREE
6529 && (strcmp (name, ".ldata.rel.ro") == 0
6530 || strcmp (name, ".ldata.rel.ro.local") == 0))
6531 flags |= SECTION_RELRO;
6533 if (strcmp (name, ".lbss") == 0
6534 || strncmp (name, ".lbss.", 5) == 0
6535 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6536 flags |= SECTION_BSS;
6541 /* Build up a unique section name, expressed as a
6542 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6543 RELOC indicates whether the initial value of EXP requires
6544 link-time relocations. */
6546 static void ATTRIBUTE_UNUSED
6547 x86_64_elf_unique_section (tree decl, int reloc)
6549 if (ix86_in_large_data_p (decl))
6551 const char *prefix = NULL;
6552 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6553 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6555 switch (categorize_decl_for_section (decl, reloc))
6558 case SECCAT_DATA_REL:
6559 case SECCAT_DATA_REL_LOCAL:
6560 case SECCAT_DATA_REL_RO:
6561 case SECCAT_DATA_REL_RO_LOCAL:
6562 prefix = one_only ? ".ld" : ".ldata";
6565 prefix = one_only ? ".lb" : ".lbss";
6568 case SECCAT_RODATA_MERGE_STR:
6569 case SECCAT_RODATA_MERGE_STR_INIT:
6570 case SECCAT_RODATA_MERGE_CONST:
6571 prefix = one_only ? ".lr" : ".lrodata";
6573 case SECCAT_SRODATA:
6580 /* We don't split these for medium model. Place them into
6581 default sections and hope for best. */
6586 const char *name, *linkonce;
6589 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6590 name = targetm.strip_name_encoding (name);
6592 /* If we're using one_only, then there needs to be a .gnu.linkonce
6593 prefix to the section name. */
6594 linkonce = one_only ? ".gnu.linkonce" : "";
6596 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6598 set_decl_section_name (decl, string);
6602 default_unique_section (decl, reloc);
6605 #ifdef COMMON_ASM_OP
6606 /* This says how to output assembler code to declare an
6607 uninitialized external linkage data object.
6609 For medium model x86-64 we need to use .largecomm opcode for
6612 x86_elf_aligned_common (FILE *file,
6613 const char *name, unsigned HOST_WIDE_INT size,
6616 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6617 && size > (unsigned int)ix86_section_threshold)
6618 fputs ("\t.largecomm\t", file);
6620 fputs (COMMON_ASM_OP, file);
6621 assemble_name (file, name);
6622 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6623 size, align / BITS_PER_UNIT);
6627 /* Utility function for targets to use in implementing
6628 ASM_OUTPUT_ALIGNED_BSS. */
6631 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6632 unsigned HOST_WIDE_INT size, int align)
6634 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6635 && size > (unsigned int)ix86_section_threshold)
6636 switch_to_section (get_named_section (decl, ".lbss", 0));
6638 switch_to_section (bss_section);
6639 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6640 #ifdef ASM_DECLARE_OBJECT_NAME
6641 last_assemble_variable_decl = decl;
6642 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6644 /* Standard thing is just output label for the object. */
6645 ASM_OUTPUT_LABEL (file, name);
6646 #endif /* ASM_DECLARE_OBJECT_NAME */
6647 ASM_OUTPUT_SKIP (file, size ? size : 1);
6650 /* Decide whether we must probe the stack before any space allocation
6651 on this target. It's essentially TARGET_STACK_PROBE except when
6652 -fstack-check causes the stack to be already probed differently. */
6655 ix86_target_stack_probe (void)
6657 /* Do not probe the stack twice if static stack checking is enabled. */
6658 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6661 return TARGET_STACK_PROBE;
6664 /* Decide whether we can make a sibling call to a function. DECL is the
6665 declaration of the function being targeted by the call and EXP is the
6666 CALL_EXPR representing the call. */
6669 ix86_function_ok_for_sibcall (tree decl, tree exp)
6671 tree type, decl_or_type;
6673 bool bind_global = decl && !targetm.binds_local_p (decl);
6675 /* If we are generating position-independent code, we cannot sibcall
6676 optimize direct calls to global functions, as the PLT requires
6677 %ebx be live. (Darwin does not have a PLT.) */
6685 /* If we need to align the outgoing stack, then sibcalling would
6686 unalign the stack, which may break the called function. */
6687 if (ix86_minimum_incoming_stack_boundary (true)
6688 < PREFERRED_STACK_BOUNDARY)
6693 decl_or_type = decl;
6694 type = TREE_TYPE (decl);
6698 /* We're looking at the CALL_EXPR, we need the type of the function. */
6699 type = CALL_EXPR_FN (exp); /* pointer expression */
6700 type = TREE_TYPE (type); /* pointer type */
6701 type = TREE_TYPE (type); /* function type */
6702 decl_or_type = type;
6705 /* Check that the return value locations are the same. Like
6706 if we are returning floats on the 80387 register stack, we cannot
6707 make a sibcall from a function that doesn't return a float to a
6708 function that does or, conversely, from a function that does return
6709 a float to a function that doesn't; the necessary stack adjustment
6710 would not be executed. This is also the place we notice
6711 differences in the return value ABI. Note that it is ok for one
6712 of the functions to have void return type as long as the return
6713 value of the other is passed in a register. */
6714 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6715 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6717 if (STACK_REG_P (a) || STACK_REG_P (b))
6719 if (!rtx_equal_p (a, b))
6722 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6724 else if (!rtx_equal_p (a, b))
6729 /* The SYSV ABI has more call-clobbered registers;
6730 disallow sibcalls from MS to SYSV. */
6731 if (cfun->machine->call_abi == MS_ABI
6732 && ix86_function_type_abi (type) == SYSV_ABI)
6737 /* If this call is indirect, we'll need to be able to use a
6738 call-clobbered register for the address of the target function.
6739 Make sure that all such registers are not used for passing
6740 parameters. Note that DLLIMPORT functions and call to global
6741 function via GOT slot are indirect. */
6743 || (bind_global && flag_pic && !flag_plt)
6744 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6746 /* Check if regparm >= 3 since arg_reg_available is set to
6747 false if regparm == 0. If regparm is 1 or 2, there is
6748 always a call-clobbered register available.
6750 ??? The symbol indirect call doesn't need a call-clobbered
6751 register. But we don't know if this is a symbol indirect
6752 call or not here. */
6753 if (ix86_function_regparm (type, NULL) >= 3
6754 && !cfun->machine->arg_reg_available)
6759 /* Otherwise okay. That also includes certain types of indirect calls. */
6763 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6764 and "sseregparm" calling convention attributes;
6765 arguments as in struct attribute_spec.handler. */
6768 ix86_handle_cconv_attribute (tree *node, tree name,
6773 if (TREE_CODE (*node) != FUNCTION_TYPE
6774 && TREE_CODE (*node) != METHOD_TYPE
6775 && TREE_CODE (*node) != FIELD_DECL
6776 && TREE_CODE (*node) != TYPE_DECL)
6778 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6780 *no_add_attrs = true;
6784 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6785 if (is_attribute_p ("regparm", name))
6789 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6791 error ("fastcall and regparm attributes are not compatible");
6794 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6796 error ("regparam and thiscall attributes are not compatible");
6799 cst = TREE_VALUE (args);
6800 if (TREE_CODE (cst) != INTEGER_CST)
6802 warning (OPT_Wattributes,
6803 "%qE attribute requires an integer constant argument",
6805 *no_add_attrs = true;
6807 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6809 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6811 *no_add_attrs = true;
6819 /* Do not warn when emulating the MS ABI. */
6820 if ((TREE_CODE (*node) != FUNCTION_TYPE
6821 && TREE_CODE (*node) != METHOD_TYPE)
6822 || ix86_function_type_abi (*node) != MS_ABI)
6823 warning (OPT_Wattributes, "%qE attribute ignored",
6825 *no_add_attrs = true;
6829 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6830 if (is_attribute_p ("fastcall", name))
6832 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6834 error ("fastcall and cdecl attributes are not compatible");
6836 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6838 error ("fastcall and stdcall attributes are not compatible");
6840 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6842 error ("fastcall and regparm attributes are not compatible");
6844 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6846 error ("fastcall and thiscall attributes are not compatible");
6850 /* Can combine stdcall with fastcall (redundant), regparm and
6852 else if (is_attribute_p ("stdcall", name))
6854 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6856 error ("stdcall and cdecl attributes are not compatible");
6858 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6860 error ("stdcall and fastcall attributes are not compatible");
6862 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6864 error ("stdcall and thiscall attributes are not compatible");
6868 /* Can combine cdecl with regparm and sseregparm. */
6869 else if (is_attribute_p ("cdecl", name))
6871 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6873 error ("stdcall and cdecl attributes are not compatible");
6875 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6877 error ("fastcall and cdecl attributes are not compatible");
6879 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6881 error ("cdecl and thiscall attributes are not compatible");
6884 else if (is_attribute_p ("thiscall", name))
6886 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6887 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6889 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6891 error ("stdcall and thiscall attributes are not compatible");
6893 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6895 error ("fastcall and thiscall attributes are not compatible");
6897 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6899 error ("cdecl and thiscall attributes are not compatible");
6903 /* Can combine sseregparm with all attributes. */
6908 /* The transactional memory builtins are implicitly regparm or fastcall
6909 depending on the ABI. Override the generic do-nothing attribute that
6910 these builtins were declared with, and replace it with one of the two
6911 attributes that we expect elsewhere. */
6914 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6915 int flags, bool *no_add_attrs)
6919 /* In no case do we want to add the placeholder attribute. */
6920 *no_add_attrs = true;
6922 /* The 64-bit ABI is unchanged for transactional memory. */
6926 /* ??? Is there a better way to validate 32-bit windows? We have
6927 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6928 if (CHECK_STACK_LIMIT > 0)
6929 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6932 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6933 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6935 decl_attributes (node, alt, flags);
6940 /* This function determines from TYPE the calling-convention. */
6943 ix86_get_callcvt (const_tree type)
6945 unsigned int ret = 0;
6950 return IX86_CALLCVT_CDECL;
6952 attrs = TYPE_ATTRIBUTES (type);
6953 if (attrs != NULL_TREE)
6955 if (lookup_attribute ("cdecl", attrs))
6956 ret |= IX86_CALLCVT_CDECL;
6957 else if (lookup_attribute ("stdcall", attrs))
6958 ret |= IX86_CALLCVT_STDCALL;
6959 else if (lookup_attribute ("fastcall", attrs))
6960 ret |= IX86_CALLCVT_FASTCALL;
6961 else if (lookup_attribute ("thiscall", attrs))
6962 ret |= IX86_CALLCVT_THISCALL;
6964 /* Regparam isn't allowed for thiscall and fastcall. */
6965 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6967 if (lookup_attribute ("regparm", attrs))
6968 ret |= IX86_CALLCVT_REGPARM;
6969 if (lookup_attribute ("sseregparm", attrs))
6970 ret |= IX86_CALLCVT_SSEREGPARM;
6973 if (IX86_BASE_CALLCVT(ret) != 0)
6977 is_stdarg = stdarg_p (type);
6978 if (TARGET_RTD && !is_stdarg)
6979 return IX86_CALLCVT_STDCALL | ret;
6983 || TREE_CODE (type) != METHOD_TYPE
6984 || ix86_function_type_abi (type) != MS_ABI)
6985 return IX86_CALLCVT_CDECL | ret;
6987 return IX86_CALLCVT_THISCALL;
6990 /* Return 0 if the attributes for two types are incompatible, 1 if they
6991 are compatible, and 2 if they are nearly compatible (which causes a
6992 warning to be generated). */
6995 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6997 unsigned int ccvt1, ccvt2;
6999 if (TREE_CODE (type1) != FUNCTION_TYPE
7000 && TREE_CODE (type1) != METHOD_TYPE)
7003 ccvt1 = ix86_get_callcvt (type1);
7004 ccvt2 = ix86_get_callcvt (type2);
7007 if (ix86_function_regparm (type1, NULL)
7008 != ix86_function_regparm (type2, NULL))
7014 /* Return the regparm value for a function with the indicated TYPE and DECL.
7015 DECL may be NULL when calling function indirectly
7016 or considering a libcall. */
7019 ix86_function_regparm (const_tree type, const_tree decl)
7026 return (ix86_function_type_abi (type) == SYSV_ABI
7027 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7028 ccvt = ix86_get_callcvt (type);
7029 regparm = ix86_regparm;
7031 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7033 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7036 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7040 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7042 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7045 /* Use register calling convention for local functions when possible. */
7047 && TREE_CODE (decl) == FUNCTION_DECL)
7049 cgraph_node *target = cgraph_node::get (decl);
7051 target = target->function_symbol ();
7053 /* Caller and callee must agree on the calling convention, so
7054 checking here just optimize means that with
7055 __attribute__((optimize (...))) caller could use regparm convention
7056 and callee not, or vice versa. Instead look at whether the callee
7057 is optimized or not. */
7058 if (target && opt_for_fn (target->decl, optimize)
7059 && !(profile_flag && !flag_fentry))
7061 cgraph_local_info *i = &target->local;
7062 if (i && i->local && i->can_change_signature)
7064 int local_regparm, globals = 0, regno;
7066 /* Make sure no regparm register is taken by a
7067 fixed register variable. */
7068 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7070 if (fixed_regs[local_regparm])
7073 /* We don't want to use regparm(3) for nested functions as
7074 these use a static chain pointer in the third argument. */
7075 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7078 /* Save a register for the split stack. */
7079 if (local_regparm == 3 && flag_split_stack)
7082 /* Each fixed register usage increases register pressure,
7083 so less registers should be used for argument passing.
7084 This functionality can be overriden by an explicit
7086 for (regno = AX_REG; regno <= DI_REG; regno++)
7087 if (fixed_regs[regno])
7091 = globals < local_regparm ? local_regparm - globals : 0;
7093 if (local_regparm > regparm)
7094 regparm = local_regparm;
7102 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7103 DFmode (2) arguments in SSE registers for a function with the
7104 indicated TYPE and DECL. DECL may be NULL when calling function
7105 indirectly or considering a libcall. Return -1 if any FP parameter
7106 should be rejected by error. This is used in siutation we imply SSE
7107 calling convetion but the function is called from another function with
7108 SSE disabled. Otherwise return 0. */
7111 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7113 gcc_assert (!TARGET_64BIT);
7115 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7116 by the sseregparm attribute. */
7117 if (TARGET_SSEREGPARM
7118 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7125 error ("calling %qD with attribute sseregparm without "
7126 "SSE/SSE2 enabled", decl);
7128 error ("calling %qT with attribute sseregparm without "
7129 "SSE/SSE2 enabled", type);
7140 cgraph_node *target = cgraph_node::get (decl);
7142 target = target->function_symbol ();
7144 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7145 (and DFmode for SSE2) arguments in SSE registers. */
7147 /* TARGET_SSE_MATH */
7148 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7149 && opt_for_fn (target->decl, optimize)
7150 && !(profile_flag && !flag_fentry))
7152 cgraph_local_info *i = &target->local;
7153 if (i && i->local && i->can_change_signature)
7155 /* Refuse to produce wrong code when local function with SSE enabled
7156 is called from SSE disabled function.
7157 FIXME: We need a way to detect these cases cross-ltrans partition
7158 and avoid using SSE calling conventions on local functions called
7159 from function with SSE disabled. For now at least delay the
7160 warning until we know we are going to produce wrong code.
7162 if (!TARGET_SSE && warn)
7164 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7165 ->x_ix86_isa_flags) ? 2 : 1;
7172 /* Return true if EAX is live at the start of the function. Used by
7173 ix86_expand_prologue to determine if we need special help before
7174 calling allocate_stack_worker. */
7177 ix86_eax_live_at_start_p (void)
7179 /* Cheat. Don't bother working forward from ix86_function_regparm
7180 to the function type to whether an actual argument is located in
7181 eax. Instead just look at cfg info, which is still close enough
7182 to correct at this point. This gives false positives for broken
7183 functions that might use uninitialized data that happens to be
7184 allocated in eax, but who cares? */
7185 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7189 ix86_keep_aggregate_return_pointer (tree fntype)
7195 attr = lookup_attribute ("callee_pop_aggregate_return",
7196 TYPE_ATTRIBUTES (fntype));
7198 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7200 /* For 32-bit MS-ABI the default is to keep aggregate
7202 if (ix86_function_type_abi (fntype) == MS_ABI)
7205 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7208 /* Value is the number of bytes of arguments automatically
7209 popped when returning from a subroutine call.
7210 FUNDECL is the declaration node of the function (as a tree),
7211 FUNTYPE is the data type of the function (as a tree),
7212 or for a library call it is an identifier node for the subroutine name.
7213 SIZE is the number of bytes of arguments passed on the stack.
7215 On the 80386, the RTD insn may be used to pop them if the number
7216 of args is fixed, but if the number is variable then the caller
7217 must pop them all. RTD can't be used for library calls now
7218 because the library is compiled with the Unix compiler.
7219 Use of RTD is a selectable option, since it is incompatible with
7220 standard Unix calling sequences. If the option is not selected,
7221 the caller must always pop the args.
7223 The attribute stdcall is equivalent to RTD on a per module basis. */
7226 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7230 /* None of the 64-bit ABIs pop arguments. */
7234 ccvt = ix86_get_callcvt (funtype);
7236 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7237 | IX86_CALLCVT_THISCALL)) != 0
7238 && ! stdarg_p (funtype))
7241 /* Lose any fake structure return argument if it is passed on the stack. */
7242 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7243 && !ix86_keep_aggregate_return_pointer (funtype))
7245 int nregs = ix86_function_regparm (funtype, fundecl);
7247 return GET_MODE_SIZE (Pmode);
7253 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7256 ix86_legitimate_combined_insn (rtx_insn *insn)
7258 /* Check operand constraints in case hard registers were propagated
7259 into insn pattern. This check prevents combine pass from
7260 generating insn patterns with invalid hard register operands.
7261 These invalid insns can eventually confuse reload to error out
7262 with a spill failure. See also PRs 46829 and 46843. */
7263 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7267 extract_insn (insn);
7268 preprocess_constraints (insn);
7270 int n_operands = recog_data.n_operands;
7271 int n_alternatives = recog_data.n_alternatives;
7272 for (i = 0; i < n_operands; i++)
7274 rtx op = recog_data.operand[i];
7275 machine_mode mode = GET_MODE (op);
7276 const operand_alternative *op_alt;
7281 /* For pre-AVX disallow unaligned loads/stores where the
7282 instructions don't support it. */
7284 && VECTOR_MODE_P (mode)
7285 && misaligned_operand (op, mode))
7287 unsigned int min_align = get_attr_ssememalign (insn);
7289 || MEM_ALIGN (op) < min_align)
7293 /* A unary operator may be accepted by the predicate, but it
7294 is irrelevant for matching constraints. */
7300 if (REG_P (SUBREG_REG (op))
7301 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7302 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7303 GET_MODE (SUBREG_REG (op)),
7306 op = SUBREG_REG (op);
7309 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7312 op_alt = recog_op_alt;
7314 /* Operand has no constraints, anything is OK. */
7315 win = !n_alternatives;
7317 alternative_mask preferred = get_preferred_alternatives (insn);
7318 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7320 if (!TEST_BIT (preferred, j))
7322 if (op_alt[i].anything_ok
7323 || (op_alt[i].matches != -1
7325 (recog_data.operand[i],
7326 recog_data.operand[op_alt[i].matches]))
7327 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7342 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7344 static unsigned HOST_WIDE_INT
7345 ix86_asan_shadow_offset (void)
7347 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7348 : HOST_WIDE_INT_C (0x7fff8000))
7349 : (HOST_WIDE_INT_1 << 29);
7352 /* Argument support functions. */
7354 /* Return true when register may be used to pass function parameters. */
7356 ix86_function_arg_regno_p (int regno)
7359 enum calling_abi call_abi;
7360 const int *parm_regs;
7362 if (TARGET_MPX && BND_REGNO_P (regno))
7368 return (regno < REGPARM_MAX
7369 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7371 return (regno < REGPARM_MAX
7372 || (TARGET_MMX && MMX_REGNO_P (regno)
7373 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7374 || (TARGET_SSE && SSE_REGNO_P (regno)
7375 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7378 if (TARGET_SSE && SSE_REGNO_P (regno)
7379 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7382 /* TODO: The function should depend on current function ABI but
7383 builtins.c would need updating then. Therefore we use the
7385 call_abi = ix86_cfun_abi ();
7387 /* RAX is used as hidden argument to va_arg functions. */
7388 if (call_abi == SYSV_ABI && regno == AX_REG)
7391 if (call_abi == MS_ABI)
7392 parm_regs = x86_64_ms_abi_int_parameter_registers;
7394 parm_regs = x86_64_int_parameter_registers;
7396 for (i = 0; i < (call_abi == MS_ABI
7397 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7398 if (regno == parm_regs[i])
7403 /* Return if we do not know how to pass TYPE solely in registers. */
7406 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7408 if (must_pass_in_stack_var_size_or_pad (mode, type))
7411 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7412 The layout_type routine is crafty and tries to trick us into passing
7413 currently unsupported vector types on the stack by using TImode. */
7414 return (!TARGET_64BIT && mode == TImode
7415 && type && TREE_CODE (type) != VECTOR_TYPE);
7418 /* It returns the size, in bytes, of the area reserved for arguments passed
7419 in registers for the function represented by fndecl dependent to the used
7422 ix86_reg_parm_stack_space (const_tree fndecl)
7424 enum calling_abi call_abi = SYSV_ABI;
7425 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7426 call_abi = ix86_function_abi (fndecl);
7428 call_abi = ix86_function_type_abi (fndecl);
7429 if (TARGET_64BIT && call_abi == MS_ABI)
7434 /* We add this as a workaround in order to use libc_has_function
7437 ix86_libc_has_function (enum function_class fn_class)
7439 return targetm.libc_has_function (fn_class);
7442 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7443 specifying the call abi used. */
7445 ix86_function_type_abi (const_tree fntype)
7447 enum calling_abi abi = ix86_abi;
7449 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7453 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7456 error ("X32 does not support ms_abi attribute");
7460 else if (abi == MS_ABI
7461 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7467 static enum calling_abi
7468 ix86_function_abi (const_tree fndecl)
7470 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7473 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7474 specifying the call abi used. */
7476 ix86_cfun_abi (void)
7478 return cfun ? cfun->machine->call_abi : ix86_abi;
7482 ix86_function_ms_hook_prologue (const_tree fn)
7484 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7486 if (decl_function_context (fn) != NULL_TREE)
7487 error_at (DECL_SOURCE_LOCATION (fn),
7488 "ms_hook_prologue is not compatible with nested function");
7495 /* Write the extra assembler code needed to declare a function properly. */
7498 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7501 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7505 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7506 unsigned int filler_cc = 0xcccccccc;
7508 for (i = 0; i < filler_count; i += 4)
7509 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7512 #ifdef SUBTARGET_ASM_UNWIND_INIT
7513 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7516 ASM_OUTPUT_LABEL (asm_out_file, fname);
7518 /* Output magic byte marker, if hot-patch attribute is set. */
7523 /* leaq [%rsp + 0], %rsp */
7524 asm_fprintf (asm_out_file, ASM_BYTE
7525 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7529 /* movl.s %edi, %edi
7531 movl.s %esp, %ebp */
7532 asm_fprintf (asm_out_file, ASM_BYTE
7533 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7539 extern void init_regs (void);
7541 /* Implementation of call abi switching target hook. Specific to FNDECL
7542 the specific call register sets are set. See also
7543 ix86_conditional_register_usage for more details. */
7545 ix86_call_abi_override (const_tree fndecl)
7547 cfun->machine->call_abi = ix86_function_abi (fndecl);
7550 /* Return 1 if pseudo register should be created and used to hold
7551 GOT address for PIC code. */
7553 ix86_use_pseudo_pic_reg (void)
7556 && (ix86_cmodel == CM_SMALL_PIC
7563 /* Initialize large model PIC register. */
7566 ix86_init_large_pic_reg (unsigned int tmp_regno)
7568 rtx_code_label *label;
7571 gcc_assert (Pmode == DImode);
7572 label = gen_label_rtx ();
7574 LABEL_PRESERVE_P (label) = 1;
7575 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7576 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7577 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7579 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7580 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7581 pic_offset_table_rtx, tmp_reg));
7584 /* Create and initialize PIC register if required. */
7586 ix86_init_pic_reg (void)
7591 if (!ix86_use_pseudo_pic_reg ())
7598 if (ix86_cmodel == CM_LARGE_PIC)
7599 ix86_init_large_pic_reg (R11_REG);
7601 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7605 /* If there is future mcount call in the function it is more profitable
7606 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7607 rtx reg = crtl->profile
7608 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7609 : pic_offset_table_rtx;
7610 rtx_insn *insn = emit_insn (gen_set_got (reg));
7611 RTX_FRAME_RELATED_P (insn) = 1;
7613 emit_move_insn (pic_offset_table_rtx, reg);
7614 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7620 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7621 insert_insn_on_edge (seq, entry_edge);
7622 commit_one_edge_insertion (entry_edge);
7625 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7626 for a call to a function whose data type is FNTYPE.
7627 For a library call, FNTYPE is 0. */
7630 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7631 tree fntype, /* tree ptr for function decl */
7632 rtx libname, /* SYMBOL_REF of library name or 0 */
7636 struct cgraph_local_info *i = NULL;
7637 struct cgraph_node *target = NULL;
7639 memset (cum, 0, sizeof (*cum));
7643 target = cgraph_node::get (fndecl);
7646 target = target->function_symbol ();
7647 i = cgraph_node::local_info (target->decl);
7648 cum->call_abi = ix86_function_abi (target->decl);
7651 cum->call_abi = ix86_function_abi (fndecl);
7654 cum->call_abi = ix86_function_type_abi (fntype);
7656 cum->caller = caller;
7658 /* Set up the number of registers to use for passing arguments. */
7659 cum->nregs = ix86_regparm;
7662 cum->nregs = (cum->call_abi == SYSV_ABI
7663 ? X86_64_REGPARM_MAX
7664 : X86_64_MS_REGPARM_MAX);
7668 cum->sse_nregs = SSE_REGPARM_MAX;
7671 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7672 ? X86_64_SSE_REGPARM_MAX
7673 : X86_64_MS_SSE_REGPARM_MAX);
7677 cum->mmx_nregs = MMX_REGPARM_MAX;
7678 cum->warn_avx512f = true;
7679 cum->warn_avx = true;
7680 cum->warn_sse = true;
7681 cum->warn_mmx = true;
7683 /* Because type might mismatch in between caller and callee, we need to
7684 use actual type of function for local calls.
7685 FIXME: cgraph_analyze can be told to actually record if function uses
7686 va_start so for local functions maybe_vaarg can be made aggressive
7688 FIXME: once typesytem is fixed, we won't need this code anymore. */
7689 if (i && i->local && i->can_change_signature)
7690 fntype = TREE_TYPE (target->decl);
7691 cum->stdarg = stdarg_p (fntype);
7692 cum->maybe_vaarg = (fntype
7693 ? (!prototype_p (fntype) || stdarg_p (fntype))
7696 cum->bnd_regno = FIRST_BND_REG;
7697 cum->bnds_in_bt = 0;
7698 cum->force_bnd_pass = 0;
7703 /* If there are variable arguments, then we won't pass anything
7704 in registers in 32-bit mode. */
7705 if (stdarg_p (fntype))
7708 /* Since in 32-bit, variable arguments are always passed on
7709 stack, there is scratch register available for indirect
7711 cfun->machine->arg_reg_available = true;
7714 cum->warn_avx512f = false;
7715 cum->warn_avx = false;
7716 cum->warn_sse = false;
7717 cum->warn_mmx = false;
7721 /* Use ecx and edx registers if function has fastcall attribute,
7722 else look for regparm information. */
7725 unsigned int ccvt = ix86_get_callcvt (fntype);
7726 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7729 cum->fastcall = 1; /* Same first register as in fastcall. */
7731 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7737 cum->nregs = ix86_function_regparm (fntype, fndecl);
7740 /* Set up the number of SSE registers used for passing SFmode
7741 and DFmode arguments. Warn for mismatching ABI. */
7742 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7745 cfun->machine->arg_reg_available = (cum->nregs > 0);
7748 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7749 But in the case of vector types, it is some vector mode.
7751 When we have only some of our vector isa extensions enabled, then there
7752 are some modes for which vector_mode_supported_p is false. For these
7753 modes, the generic vector support in gcc will choose some non-vector mode
7754 in order to implement the type. By computing the natural mode, we'll
7755 select the proper ABI location for the operand and not depend on whatever
7756 the middle-end decides to do with these vector types.
7758 The midde-end can't deal with the vector types > 16 bytes. In this
7759 case, we return the original mode and warn ABI change if CUM isn't
7762 If INT_RETURN is true, warn ABI change if the vector mode isn't
7763 available for function return value. */
7766 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7769 machine_mode mode = TYPE_MODE (type);
7771 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7773 HOST_WIDE_INT size = int_size_in_bytes (type);
7774 if ((size == 8 || size == 16 || size == 32 || size == 64)
7775 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7776 && TYPE_VECTOR_SUBPARTS (type) > 1)
7778 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7780 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7781 mode = MIN_MODE_VECTOR_FLOAT;
7783 mode = MIN_MODE_VECTOR_INT;
7785 /* Get the mode which has this inner mode and number of units. */
7786 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7787 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7788 && GET_MODE_INNER (mode) == innermode)
7790 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7792 static bool warnedavx512f;
7793 static bool warnedavx512f_ret;
7795 if (cum && cum->warn_avx512f && !warnedavx512f)
7797 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7798 "without AVX512F enabled changes the ABI"))
7799 warnedavx512f = true;
7801 else if (in_return && !warnedavx512f_ret)
7803 if (warning (OPT_Wpsabi, "AVX512F vector return "
7804 "without AVX512F enabled changes the ABI"))
7805 warnedavx512f_ret = true;
7808 return TYPE_MODE (type);
7810 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7812 static bool warnedavx;
7813 static bool warnedavx_ret;
7815 if (cum && cum->warn_avx && !warnedavx)
7817 if (warning (OPT_Wpsabi, "AVX vector argument "
7818 "without AVX enabled changes the ABI"))
7821 else if (in_return && !warnedavx_ret)
7823 if (warning (OPT_Wpsabi, "AVX vector return "
7824 "without AVX enabled changes the ABI"))
7825 warnedavx_ret = true;
7828 return TYPE_MODE (type);
7830 else if (((size == 8 && TARGET_64BIT) || size == 16)
7834 static bool warnedsse;
7835 static bool warnedsse_ret;
7837 if (cum && cum->warn_sse && !warnedsse)
7839 if (warning (OPT_Wpsabi, "SSE vector argument "
7840 "without SSE enabled changes the ABI"))
7843 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7845 if (warning (OPT_Wpsabi, "SSE vector return "
7846 "without SSE enabled changes the ABI"))
7847 warnedsse_ret = true;
7850 else if ((size == 8 && !TARGET_64BIT)
7854 static bool warnedmmx;
7855 static bool warnedmmx_ret;
7857 if (cum && cum->warn_mmx && !warnedmmx)
7859 if (warning (OPT_Wpsabi, "MMX vector argument "
7860 "without MMX enabled changes the ABI"))
7863 else if (in_return && !warnedmmx_ret)
7865 if (warning (OPT_Wpsabi, "MMX vector return "
7866 "without MMX enabled changes the ABI"))
7867 warnedmmx_ret = true;
7880 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7881 this may not agree with the mode that the type system has chosen for the
7882 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7883 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7886 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7891 if (orig_mode != BLKmode)
7892 tmp = gen_rtx_REG (orig_mode, regno);
7895 tmp = gen_rtx_REG (mode, regno);
7896 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7897 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7903 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7904 of this code is to classify each 8bytes of incoming argument by the register
7905 class and assign registers accordingly. */
7907 /* Return the union class of CLASS1 and CLASS2.
7908 See the x86-64 PS ABI for details. */
7910 static enum x86_64_reg_class
7911 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7913 /* Rule #1: If both classes are equal, this is the resulting class. */
7914 if (class1 == class2)
7917 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7919 if (class1 == X86_64_NO_CLASS)
7921 if (class2 == X86_64_NO_CLASS)
7924 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7925 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7926 return X86_64_MEMORY_CLASS;
7928 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7929 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7930 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7931 return X86_64_INTEGERSI_CLASS;
7932 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7933 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7934 return X86_64_INTEGER_CLASS;
7936 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7938 if (class1 == X86_64_X87_CLASS
7939 || class1 == X86_64_X87UP_CLASS
7940 || class1 == X86_64_COMPLEX_X87_CLASS
7941 || class2 == X86_64_X87_CLASS
7942 || class2 == X86_64_X87UP_CLASS
7943 || class2 == X86_64_COMPLEX_X87_CLASS)
7944 return X86_64_MEMORY_CLASS;
7946 /* Rule #6: Otherwise class SSE is used. */
7947 return X86_64_SSE_CLASS;
7950 /* Classify the argument of type TYPE and mode MODE.
7951 CLASSES will be filled by the register class used to pass each word
7952 of the operand. The number of words is returned. In case the parameter
7953 should be passed in memory, 0 is returned. As a special case for zero
7954 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7956 BIT_OFFSET is used internally for handling records and specifies offset
7957 of the offset in bits modulo 512 to avoid overflow cases.
7959 See the x86-64 PS ABI for details.
7963 classify_argument (machine_mode mode, const_tree type,
7964 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7966 HOST_WIDE_INT bytes =
7967 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7968 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7970 /* Variable sized entities are always passed/returned in memory. */
7974 if (mode != VOIDmode
7975 && targetm.calls.must_pass_in_stack (mode, type))
7978 if (type && AGGREGATE_TYPE_P (type))
7982 enum x86_64_reg_class subclasses[MAX_CLASSES];
7984 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7988 for (i = 0; i < words; i++)
7989 classes[i] = X86_64_NO_CLASS;
7991 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7992 signalize memory class, so handle it as special case. */
7995 classes[0] = X86_64_NO_CLASS;
7999 /* Classify each field of record and merge classes. */
8000 switch (TREE_CODE (type))
8003 /* And now merge the fields of structure. */
8004 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8006 if (TREE_CODE (field) == FIELD_DECL)
8010 if (TREE_TYPE (field) == error_mark_node)
8013 /* Bitfields are always classified as integer. Handle them
8014 early, since later code would consider them to be
8015 misaligned integers. */
8016 if (DECL_BIT_FIELD (field))
8018 for (i = (int_bit_position (field)
8019 + (bit_offset % 64)) / 8 / 8;
8020 i < ((int_bit_position (field) + (bit_offset % 64))
8021 + tree_to_shwi (DECL_SIZE (field))
8024 merge_classes (X86_64_INTEGER_CLASS,
8031 type = TREE_TYPE (field);
8033 /* Flexible array member is ignored. */
8034 if (TYPE_MODE (type) == BLKmode
8035 && TREE_CODE (type) == ARRAY_TYPE
8036 && TYPE_SIZE (type) == NULL_TREE
8037 && TYPE_DOMAIN (type) != NULL_TREE
8038 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8043 if (!warned && warn_psabi)
8046 inform (input_location,
8047 "the ABI of passing struct with"
8048 " a flexible array member has"
8049 " changed in GCC 4.4");
8053 num = classify_argument (TYPE_MODE (type), type,
8055 (int_bit_position (field)
8056 + bit_offset) % 512);
8059 pos = (int_bit_position (field)
8060 + (bit_offset % 64)) / 8 / 8;
8061 for (i = 0; i < num && (i + pos) < words; i++)
8063 merge_classes (subclasses[i], classes[i + pos]);
8070 /* Arrays are handled as small records. */
8073 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8074 TREE_TYPE (type), subclasses, bit_offset);
8078 /* The partial classes are now full classes. */
8079 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8080 subclasses[0] = X86_64_SSE_CLASS;
8081 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8082 && !((bit_offset % 64) == 0 && bytes == 4))
8083 subclasses[0] = X86_64_INTEGER_CLASS;
8085 for (i = 0; i < words; i++)
8086 classes[i] = subclasses[i % num];
8091 case QUAL_UNION_TYPE:
8092 /* Unions are similar to RECORD_TYPE but offset is always 0.
8094 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8096 if (TREE_CODE (field) == FIELD_DECL)
8100 if (TREE_TYPE (field) == error_mark_node)
8103 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8104 TREE_TYPE (field), subclasses,
8108 for (i = 0; i < num && i < words; i++)
8109 classes[i] = merge_classes (subclasses[i], classes[i]);
8120 /* When size > 16 bytes, if the first one isn't
8121 X86_64_SSE_CLASS or any other ones aren't
8122 X86_64_SSEUP_CLASS, everything should be passed in
8124 if (classes[0] != X86_64_SSE_CLASS)
8127 for (i = 1; i < words; i++)
8128 if (classes[i] != X86_64_SSEUP_CLASS)
8132 /* Final merger cleanup. */
8133 for (i = 0; i < words; i++)
8135 /* If one class is MEMORY, everything should be passed in
8137 if (classes[i] == X86_64_MEMORY_CLASS)
8140 /* The X86_64_SSEUP_CLASS should be always preceded by
8141 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8142 if (classes[i] == X86_64_SSEUP_CLASS
8143 && classes[i - 1] != X86_64_SSE_CLASS
8144 && classes[i - 1] != X86_64_SSEUP_CLASS)
8146 /* The first one should never be X86_64_SSEUP_CLASS. */
8147 gcc_assert (i != 0);
8148 classes[i] = X86_64_SSE_CLASS;
8151 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8152 everything should be passed in memory. */
8153 if (classes[i] == X86_64_X87UP_CLASS
8154 && (classes[i - 1] != X86_64_X87_CLASS))
8158 /* The first one should never be X86_64_X87UP_CLASS. */
8159 gcc_assert (i != 0);
8160 if (!warned && warn_psabi)
8163 inform (input_location,
8164 "the ABI of passing union with long double"
8165 " has changed in GCC 4.4");
8173 /* Compute alignment needed. We align all types to natural boundaries with
8174 exception of XFmode that is aligned to 64bits. */
8175 if (mode != VOIDmode && mode != BLKmode)
8177 int mode_alignment = GET_MODE_BITSIZE (mode);
8180 mode_alignment = 128;
8181 else if (mode == XCmode)
8182 mode_alignment = 256;
8183 if (COMPLEX_MODE_P (mode))
8184 mode_alignment /= 2;
8185 /* Misaligned fields are always returned in memory. */
8186 if (bit_offset % mode_alignment)
8190 /* for V1xx modes, just use the base mode */
8191 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8192 && GET_MODE_UNIT_SIZE (mode) == bytes)
8193 mode = GET_MODE_INNER (mode);
8195 /* Classification of atomic types. */
8200 classes[0] = X86_64_SSE_CLASS;
8203 classes[0] = X86_64_SSE_CLASS;
8204 classes[1] = X86_64_SSEUP_CLASS;
8214 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8216 /* Analyze last 128 bits only. */
8217 size = (size - 1) & 0x7f;
8221 classes[0] = X86_64_INTEGERSI_CLASS;
8226 classes[0] = X86_64_INTEGER_CLASS;
8229 else if (size < 64+32)
8231 classes[0] = X86_64_INTEGER_CLASS;
8232 classes[1] = X86_64_INTEGERSI_CLASS;
8235 else if (size < 64+64)
8237 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8245 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8249 /* OImode shouldn't be used directly. */
8254 if (!(bit_offset % 64))
8255 classes[0] = X86_64_SSESF_CLASS;
8257 classes[0] = X86_64_SSE_CLASS;
8260 classes[0] = X86_64_SSEDF_CLASS;
8263 classes[0] = X86_64_X87_CLASS;
8264 classes[1] = X86_64_X87UP_CLASS;
8267 classes[0] = X86_64_SSE_CLASS;
8268 classes[1] = X86_64_SSEUP_CLASS;
8271 classes[0] = X86_64_SSE_CLASS;
8272 if (!(bit_offset % 64))
8278 if (!warned && warn_psabi)
8281 inform (input_location,
8282 "the ABI of passing structure with complex float"
8283 " member has changed in GCC 4.4");
8285 classes[1] = X86_64_SSESF_CLASS;
8289 classes[0] = X86_64_SSEDF_CLASS;
8290 classes[1] = X86_64_SSEDF_CLASS;
8293 classes[0] = X86_64_COMPLEX_X87_CLASS;
8296 /* This modes is larger than 16 bytes. */
8304 classes[0] = X86_64_SSE_CLASS;
8305 classes[1] = X86_64_SSEUP_CLASS;
8306 classes[2] = X86_64_SSEUP_CLASS;
8307 classes[3] = X86_64_SSEUP_CLASS;
8315 classes[0] = X86_64_SSE_CLASS;
8316 classes[1] = X86_64_SSEUP_CLASS;
8317 classes[2] = X86_64_SSEUP_CLASS;
8318 classes[3] = X86_64_SSEUP_CLASS;
8319 classes[4] = X86_64_SSEUP_CLASS;
8320 classes[5] = X86_64_SSEUP_CLASS;
8321 classes[6] = X86_64_SSEUP_CLASS;
8322 classes[7] = X86_64_SSEUP_CLASS;
8330 classes[0] = X86_64_SSE_CLASS;
8331 classes[1] = X86_64_SSEUP_CLASS;
8339 classes[0] = X86_64_SSE_CLASS;
8345 gcc_assert (VECTOR_MODE_P (mode));
8350 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8352 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8353 classes[0] = X86_64_INTEGERSI_CLASS;
8355 classes[0] = X86_64_INTEGER_CLASS;
8356 classes[1] = X86_64_INTEGER_CLASS;
8357 return 1 + (bytes > 8);
8361 /* Examine the argument and return set number of register required in each
8362 class. Return true iff parameter should be passed in memory. */
8365 examine_argument (machine_mode mode, const_tree type, int in_return,
8366 int *int_nregs, int *sse_nregs)
8368 enum x86_64_reg_class regclass[MAX_CLASSES];
8369 int n = classify_argument (mode, type, regclass, 0);
8376 for (n--; n >= 0; n--)
8377 switch (regclass[n])
8379 case X86_64_INTEGER_CLASS:
8380 case X86_64_INTEGERSI_CLASS:
8383 case X86_64_SSE_CLASS:
8384 case X86_64_SSESF_CLASS:
8385 case X86_64_SSEDF_CLASS:
8388 case X86_64_NO_CLASS:
8389 case X86_64_SSEUP_CLASS:
8391 case X86_64_X87_CLASS:
8392 case X86_64_X87UP_CLASS:
8393 case X86_64_COMPLEX_X87_CLASS:
8397 case X86_64_MEMORY_CLASS:
8404 /* Construct container for the argument used by GCC interface. See
8405 FUNCTION_ARG for the detailed description. */
8408 construct_container (machine_mode mode, machine_mode orig_mode,
8409 const_tree type, int in_return, int nintregs, int nsseregs,
8410 const int *intreg, int sse_regno)
8412 /* The following variables hold the static issued_error state. */
8413 static bool issued_sse_arg_error;
8414 static bool issued_sse_ret_error;
8415 static bool issued_x87_ret_error;
8417 machine_mode tmpmode;
8419 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8420 enum x86_64_reg_class regclass[MAX_CLASSES];
8424 int needed_sseregs, needed_intregs;
8425 rtx exp[MAX_CLASSES];
8428 n = classify_argument (mode, type, regclass, 0);
8431 if (examine_argument (mode, type, in_return, &needed_intregs,
8434 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8437 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8438 some less clueful developer tries to use floating-point anyway. */
8439 if (needed_sseregs && !TARGET_SSE)
8443 if (!issued_sse_ret_error)
8445 error ("SSE register return with SSE disabled");
8446 issued_sse_ret_error = true;
8449 else if (!issued_sse_arg_error)
8451 error ("SSE register argument with SSE disabled");
8452 issued_sse_arg_error = true;
8457 /* Likewise, error if the ABI requires us to return values in the
8458 x87 registers and the user specified -mno-80387. */
8459 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8460 for (i = 0; i < n; i++)
8461 if (regclass[i] == X86_64_X87_CLASS
8462 || regclass[i] == X86_64_X87UP_CLASS
8463 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8465 if (!issued_x87_ret_error)
8467 error ("x87 register return with x87 disabled");
8468 issued_x87_ret_error = true;
8473 /* First construct simple cases. Avoid SCmode, since we want to use
8474 single register to pass this type. */
8475 if (n == 1 && mode != SCmode)
8476 switch (regclass[0])
8478 case X86_64_INTEGER_CLASS:
8479 case X86_64_INTEGERSI_CLASS:
8480 return gen_rtx_REG (mode, intreg[0]);
8481 case X86_64_SSE_CLASS:
8482 case X86_64_SSESF_CLASS:
8483 case X86_64_SSEDF_CLASS:
8484 if (mode != BLKmode)
8485 return gen_reg_or_parallel (mode, orig_mode,
8486 SSE_REGNO (sse_regno));
8488 case X86_64_X87_CLASS:
8489 case X86_64_COMPLEX_X87_CLASS:
8490 return gen_rtx_REG (mode, FIRST_STACK_REG);
8491 case X86_64_NO_CLASS:
8492 /* Zero sized array, struct or class. */
8498 && regclass[0] == X86_64_SSE_CLASS
8499 && regclass[1] == X86_64_SSEUP_CLASS
8501 return gen_reg_or_parallel (mode, orig_mode,
8502 SSE_REGNO (sse_regno));
8504 && regclass[0] == X86_64_SSE_CLASS
8505 && regclass[1] == X86_64_SSEUP_CLASS
8506 && regclass[2] == X86_64_SSEUP_CLASS
8507 && regclass[3] == X86_64_SSEUP_CLASS
8509 return gen_reg_or_parallel (mode, orig_mode,
8510 SSE_REGNO (sse_regno));
8512 && regclass[0] == X86_64_SSE_CLASS
8513 && regclass[1] == X86_64_SSEUP_CLASS
8514 && regclass[2] == X86_64_SSEUP_CLASS
8515 && regclass[3] == X86_64_SSEUP_CLASS
8516 && regclass[4] == X86_64_SSEUP_CLASS
8517 && regclass[5] == X86_64_SSEUP_CLASS
8518 && regclass[6] == X86_64_SSEUP_CLASS
8519 && regclass[7] == X86_64_SSEUP_CLASS
8521 return gen_reg_or_parallel (mode, orig_mode,
8522 SSE_REGNO (sse_regno));
8524 && regclass[0] == X86_64_X87_CLASS
8525 && regclass[1] == X86_64_X87UP_CLASS)
8526 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8529 && regclass[0] == X86_64_INTEGER_CLASS
8530 && regclass[1] == X86_64_INTEGER_CLASS
8531 && (mode == CDImode || mode == TImode)
8532 && intreg[0] + 1 == intreg[1])
8533 return gen_rtx_REG (mode, intreg[0]);
8535 /* Otherwise figure out the entries of the PARALLEL. */
8536 for (i = 0; i < n; i++)
8540 switch (regclass[i])
8542 case X86_64_NO_CLASS:
8544 case X86_64_INTEGER_CLASS:
8545 case X86_64_INTEGERSI_CLASS:
8546 /* Merge TImodes on aligned occasions here too. */
8547 if (i * 8 + 8 > bytes)
8549 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8550 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8554 /* We've requested 24 bytes we
8555 don't have mode for. Use DImode. */
8556 if (tmpmode == BLKmode)
8559 = gen_rtx_EXPR_LIST (VOIDmode,
8560 gen_rtx_REG (tmpmode, *intreg),
8564 case X86_64_SSESF_CLASS:
8566 = gen_rtx_EXPR_LIST (VOIDmode,
8567 gen_rtx_REG (SFmode,
8568 SSE_REGNO (sse_regno)),
8572 case X86_64_SSEDF_CLASS:
8574 = gen_rtx_EXPR_LIST (VOIDmode,
8575 gen_rtx_REG (DFmode,
8576 SSE_REGNO (sse_regno)),
8580 case X86_64_SSE_CLASS:
8588 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8598 && regclass[1] == X86_64_SSEUP_CLASS
8599 && regclass[2] == X86_64_SSEUP_CLASS
8600 && regclass[3] == X86_64_SSEUP_CLASS);
8606 && regclass[1] == X86_64_SSEUP_CLASS
8607 && regclass[2] == X86_64_SSEUP_CLASS
8608 && regclass[3] == X86_64_SSEUP_CLASS
8609 && regclass[4] == X86_64_SSEUP_CLASS
8610 && regclass[5] == X86_64_SSEUP_CLASS
8611 && regclass[6] == X86_64_SSEUP_CLASS
8612 && regclass[7] == X86_64_SSEUP_CLASS);
8620 = gen_rtx_EXPR_LIST (VOIDmode,
8621 gen_rtx_REG (tmpmode,
8622 SSE_REGNO (sse_regno)),
8631 /* Empty aligned struct, union or class. */
8635 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8636 for (i = 0; i < nexps; i++)
8637 XVECEXP (ret, 0, i) = exp [i];
8641 /* Update the data in CUM to advance over an argument of mode MODE
8642 and data type TYPE. (TYPE is null for libcalls where that information
8643 may not be available.)
8645 Return a number of integer regsiters advanced over. */
8648 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8649 const_tree type, HOST_WIDE_INT bytes,
8650 HOST_WIDE_INT words)
8653 bool error_p = NULL;
8657 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8658 bytes in registers. */
8659 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8679 cum->words += words;
8680 cum->nregs -= words;
8681 cum->regno += words;
8682 if (cum->nregs >= 0)
8684 if (cum->nregs <= 0)
8687 cfun->machine->arg_reg_available = false;
8693 /* OImode shouldn't be used directly. */
8697 if (cum->float_in_sse == -1)
8699 if (cum->float_in_sse < 2)
8702 if (cum->float_in_sse == -1)
8704 if (cum->float_in_sse < 1)
8727 if (!type || !AGGREGATE_TYPE_P (type))
8729 cum->sse_words += words;
8730 cum->sse_nregs -= 1;
8731 cum->sse_regno += 1;
8732 if (cum->sse_nregs <= 0)
8746 if (!type || !AGGREGATE_TYPE_P (type))
8748 cum->mmx_words += words;
8749 cum->mmx_nregs -= 1;
8750 cum->mmx_regno += 1;
8751 if (cum->mmx_nregs <= 0)
8761 cum->float_in_sse = 0;
8762 error ("calling %qD with SSE calling convention without "
8763 "SSE/SSE2 enabled", cum->decl);
8764 sorry ("this is a GCC bug that can be worked around by adding "
8765 "attribute used to function called");
8772 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8773 const_tree type, HOST_WIDE_INT words, bool named)
8775 int int_nregs, sse_nregs;
8777 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8778 if (!named && (VALID_AVX512F_REG_MODE (mode)
8779 || VALID_AVX256_REG_MODE (mode)))
8782 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8783 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8785 cum->nregs -= int_nregs;
8786 cum->sse_nregs -= sse_nregs;
8787 cum->regno += int_nregs;
8788 cum->sse_regno += sse_nregs;
8793 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8794 cum->words = ROUND_UP (cum->words, align);
8795 cum->words += words;
8801 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8802 HOST_WIDE_INT words)
8804 /* Otherwise, this should be passed indirect. */
8805 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8807 cum->words += words;
8817 /* Update the data in CUM to advance over an argument of mode MODE and
8818 data type TYPE. (TYPE is null for libcalls where that information
8819 may not be available.) */
8822 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8823 const_tree type, bool named)
8825 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8826 HOST_WIDE_INT bytes, words;
8829 if (mode == BLKmode)
8830 bytes = int_size_in_bytes (type);
8832 bytes = GET_MODE_SIZE (mode);
8833 words = CEIL (bytes, UNITS_PER_WORD);
8836 mode = type_natural_mode (type, NULL, false);
8838 if ((type && POINTER_BOUNDS_TYPE_P (type))
8839 || POINTER_BOUNDS_MODE_P (mode))
8841 /* If we pass bounds in BT then just update remained bounds count. */
8842 if (cum->bnds_in_bt)
8848 /* Update remained number of bounds to force. */
8849 if (cum->force_bnd_pass)
8850 cum->force_bnd_pass--;
8857 /* The first arg not going to Bounds Tables resets this counter. */
8858 cum->bnds_in_bt = 0;
8859 /* For unnamed args we always pass bounds to avoid bounds mess when
8860 passed and received types do not match. If bounds do not follow
8861 unnamed arg, still pretend required number of bounds were passed. */
8862 if (cum->force_bnd_pass)
8864 cum->bnd_regno += cum->force_bnd_pass;
8865 cum->force_bnd_pass = 0;
8870 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8872 if (call_abi == MS_ABI)
8873 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8875 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8878 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8880 /* For stdarg we expect bounds to be passed for each value passed
8883 cum->force_bnd_pass = nregs;
8884 /* For pointers passed in memory we expect bounds passed in Bounds
8887 cum->bnds_in_bt = chkp_type_bounds_count (type);
8890 /* Define where to put the arguments to a function.
8891 Value is zero to push the argument on the stack,
8892 or a hard register in which to store the argument.
8894 MODE is the argument's machine mode.
8895 TYPE is the data type of the argument (as a tree).
8896 This is null for libcalls where that information may
8898 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8899 the preceding args and about the function being called.
8900 NAMED is nonzero if this argument is a named parameter
8901 (otherwise it is an extra parameter matching an ellipsis). */
8904 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8905 machine_mode orig_mode, const_tree type,
8906 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8908 bool error_p = false;
8909 /* Avoid the AL settings for the Unix64 ABI. */
8910 if (mode == VOIDmode)
8915 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8916 bytes in registers. */
8917 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8936 if (words <= cum->nregs)
8938 int regno = cum->regno;
8940 /* Fastcall allocates the first two DWORD (SImode) or
8941 smaller arguments to ECX and EDX if it isn't an
8947 || (type && AGGREGATE_TYPE_P (type)))
8950 /* ECX not EAX is the first allocated register. */
8951 if (regno == AX_REG)
8954 return gen_rtx_REG (mode, regno);
8959 if (cum->float_in_sse == -1)
8961 if (cum->float_in_sse < 2)
8964 if (cum->float_in_sse == -1)
8966 if (cum->float_in_sse < 1)
8970 /* In 32bit, we pass TImode in xmm registers. */
8977 if (!type || !AGGREGATE_TYPE_P (type))
8980 return gen_reg_or_parallel (mode, orig_mode,
8981 cum->sse_regno + FIRST_SSE_REG);
8987 /* OImode and XImode shouldn't be used directly. */
9002 if (!type || !AGGREGATE_TYPE_P (type))
9005 return gen_reg_or_parallel (mode, orig_mode,
9006 cum->sse_regno + FIRST_SSE_REG);
9016 if (!type || !AGGREGATE_TYPE_P (type))
9019 return gen_reg_or_parallel (mode, orig_mode,
9020 cum->mmx_regno + FIRST_MMX_REG);
9026 cum->float_in_sse = 0;
9027 error ("calling %qD with SSE calling convention without "
9028 "SSE/SSE2 enabled", cum->decl);
9029 sorry ("this is a GCC bug that can be worked around by adding "
9030 "attribute used to function called");
9037 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9038 machine_mode orig_mode, const_tree type, bool named)
9040 /* Handle a hidden AL argument containing number of registers
9041 for varargs x86-64 functions. */
9042 if (mode == VOIDmode)
9043 return GEN_INT (cum->maybe_vaarg
9044 ? (cum->sse_nregs < 0
9045 ? X86_64_SSE_REGPARM_MAX
9066 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9072 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9074 &x86_64_int_parameter_registers [cum->regno],
9079 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9080 machine_mode orig_mode, bool named,
9081 HOST_WIDE_INT bytes)
9085 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9086 We use value of -2 to specify that current function call is MSABI. */
9087 if (mode == VOIDmode)
9088 return GEN_INT (-2);
9090 /* If we've run out of registers, it goes on the stack. */
9091 if (cum->nregs == 0)
9094 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9096 /* Only floating point modes are passed in anything but integer regs. */
9097 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9100 regno = cum->regno + FIRST_SSE_REG;
9105 /* Unnamed floating parameters are passed in both the
9106 SSE and integer registers. */
9107 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9108 t2 = gen_rtx_REG (mode, regno);
9109 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9110 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9111 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9114 /* Handle aggregated types passed in register. */
9115 if (orig_mode == BLKmode)
9117 if (bytes > 0 && bytes <= 8)
9118 mode = (bytes > 4 ? DImode : SImode);
9119 if (mode == BLKmode)
9123 return gen_reg_or_parallel (mode, orig_mode, regno);
9126 /* Return where to put the arguments to a function.
9127 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9129 MODE is the argument's machine mode. TYPE is the data type of the
9130 argument. It is null for libcalls where that information may not be
9131 available. CUM gives information about the preceding args and about
9132 the function being called. NAMED is nonzero if this argument is a
9133 named parameter (otherwise it is an extra parameter matching an
9137 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9138 const_tree type, bool named)
9140 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9141 machine_mode mode = omode;
9142 HOST_WIDE_INT bytes, words;
9145 /* All pointer bounds arguments are handled separately here. */
9146 if ((type && POINTER_BOUNDS_TYPE_P (type))
9147 || POINTER_BOUNDS_MODE_P (mode))
9149 /* Return NULL if bounds are forced to go in Bounds Table. */
9150 if (cum->bnds_in_bt)
9152 /* Return the next available bound reg if any. */
9153 else if (cum->bnd_regno <= LAST_BND_REG)
9154 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9155 /* Return the next special slot number otherwise. */
9157 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9162 if (mode == BLKmode)
9163 bytes = int_size_in_bytes (type);
9165 bytes = GET_MODE_SIZE (mode);
9166 words = CEIL (bytes, UNITS_PER_WORD);
9168 /* To simplify the code below, represent vector types with a vector mode
9169 even if MMX/SSE are not active. */
9170 if (type && TREE_CODE (type) == VECTOR_TYPE)
9171 mode = type_natural_mode (type, cum, false);
9175 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9177 if (call_abi == MS_ABI)
9178 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9180 arg = function_arg_64 (cum, mode, omode, type, named);
9183 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9188 /* A C expression that indicates when an argument must be passed by
9189 reference. If nonzero for an argument, a copy of that argument is
9190 made in memory and a pointer to the argument is passed instead of
9191 the argument itself. The pointer is passed in whatever way is
9192 appropriate for passing a pointer to that type. */
9195 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9196 const_tree type, bool)
9198 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9200 /* Bounds are never passed by reference. */
9201 if ((type && POINTER_BOUNDS_TYPE_P (type))
9202 || POINTER_BOUNDS_MODE_P (mode))
9207 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9209 /* See Windows x64 Software Convention. */
9210 if (call_abi == MS_ABI)
9212 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9216 /* Arrays are passed by reference. */
9217 if (TREE_CODE (type) == ARRAY_TYPE)
9220 if (RECORD_OR_UNION_TYPE_P (type))
9222 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9223 are passed by reference. */
9224 msize = int_size_in_bytes (type);
9228 /* __m128 is passed by reference. */
9229 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9231 else if (type && int_size_in_bytes (type) == -1)
9238 /* Return true when TYPE should be 128bit aligned for 32bit argument
9239 passing ABI. XXX: This function is obsolete and is only used for
9240 checking psABI compatibility with previous versions of GCC. */
9243 ix86_compat_aligned_value_p (const_tree type)
9245 machine_mode mode = TYPE_MODE (type);
9246 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9250 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9252 if (TYPE_ALIGN (type) < 128)
9255 if (AGGREGATE_TYPE_P (type))
9257 /* Walk the aggregates recursively. */
9258 switch (TREE_CODE (type))
9262 case QUAL_UNION_TYPE:
9266 /* Walk all the structure fields. */
9267 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9269 if (TREE_CODE (field) == FIELD_DECL
9270 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9277 /* Just for use if some languages passes arrays by value. */
9278 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9289 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9290 XXX: This function is obsolete and is only used for checking psABI
9291 compatibility with previous versions of GCC. */
9294 ix86_compat_function_arg_boundary (machine_mode mode,
9295 const_tree type, unsigned int align)
9297 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9298 natural boundaries. */
9299 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9301 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9302 make an exception for SSE modes since these require 128bit
9305 The handling here differs from field_alignment. ICC aligns MMX
9306 arguments to 4 byte boundaries, while structure fields are aligned
9307 to 8 byte boundaries. */
9310 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9311 align = PARM_BOUNDARY;
9315 if (!ix86_compat_aligned_value_p (type))
9316 align = PARM_BOUNDARY;
9319 if (align > BIGGEST_ALIGNMENT)
9320 align = BIGGEST_ALIGNMENT;
9324 /* Return true when TYPE should be 128bit aligned for 32bit argument
9328 ix86_contains_aligned_value_p (const_tree type)
9330 machine_mode mode = TYPE_MODE (type);
9332 if (mode == XFmode || mode == XCmode)
9335 if (TYPE_ALIGN (type) < 128)
9338 if (AGGREGATE_TYPE_P (type))
9340 /* Walk the aggregates recursively. */
9341 switch (TREE_CODE (type))
9345 case QUAL_UNION_TYPE:
9349 /* Walk all the structure fields. */
9350 for (field = TYPE_FIELDS (type);
9352 field = DECL_CHAIN (field))
9354 if (TREE_CODE (field) == FIELD_DECL
9355 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9362 /* Just for use if some languages passes arrays by value. */
9363 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9372 return TYPE_ALIGN (type) >= 128;
9377 /* Gives the alignment boundary, in bits, of an argument with the
9378 specified mode and type. */
9381 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9386 /* Since the main variant type is used for call, we convert it to
9387 the main variant type. */
9388 type = TYPE_MAIN_VARIANT (type);
9389 align = TYPE_ALIGN (type);
9392 align = GET_MODE_ALIGNMENT (mode);
9393 if (align < PARM_BOUNDARY)
9394 align = PARM_BOUNDARY;
9398 unsigned int saved_align = align;
9402 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9405 if (mode == XFmode || mode == XCmode)
9406 align = PARM_BOUNDARY;
9408 else if (!ix86_contains_aligned_value_p (type))
9409 align = PARM_BOUNDARY;
9412 align = PARM_BOUNDARY;
9417 && align != ix86_compat_function_arg_boundary (mode, type,
9421 inform (input_location,
9422 "The ABI for passing parameters with %d-byte"
9423 " alignment has changed in GCC 4.6",
9424 align / BITS_PER_UNIT);
9431 /* Return true if N is a possible register number of function value. */
9434 ix86_function_value_regno_p (const unsigned int regno)
9441 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9444 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9448 return chkp_function_instrumented_p (current_function_decl);
9450 /* Complex values are returned in %st(0)/%st(1) pair. */
9453 /* TODO: The function should depend on current function ABI but
9454 builtins.c would need updating then. Therefore we use the
9456 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9458 return TARGET_FLOAT_RETURNS_IN_80387;
9460 /* Complex values are returned in %xmm0/%xmm1 pair. */
9466 if (TARGET_MACHO || TARGET_64BIT)
9474 /* Define how to find the value returned by a function.
9475 VALTYPE is the data type of the value (as a tree).
9476 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9477 otherwise, FUNC is 0. */
9480 function_value_32 (machine_mode orig_mode, machine_mode mode,
9481 const_tree fntype, const_tree fn)
9485 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9486 we normally prevent this case when mmx is not available. However
9487 some ABIs may require the result to be returned like DImode. */
9488 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9489 regno = FIRST_MMX_REG;
9491 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9492 we prevent this case when sse is not available. However some ABIs
9493 may require the result to be returned like integer TImode. */
9494 else if (mode == TImode
9495 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9496 regno = FIRST_SSE_REG;
9498 /* 32-byte vector modes in %ymm0. */
9499 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9500 regno = FIRST_SSE_REG;
9502 /* 64-byte vector modes in %zmm0. */
9503 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9504 regno = FIRST_SSE_REG;
9506 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9507 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9508 regno = FIRST_FLOAT_REG;
9510 /* Most things go in %eax. */
9513 /* Override FP return register with %xmm0 for local functions when
9514 SSE math is enabled or for functions with sseregparm attribute. */
9515 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9517 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9518 if (sse_level == -1)
9520 error ("calling %qD with SSE caling convention without "
9521 "SSE/SSE2 enabled", fn);
9522 sorry ("this is a GCC bug that can be worked around by adding "
9523 "attribute used to function called");
9525 else if ((sse_level >= 1 && mode == SFmode)
9526 || (sse_level == 2 && mode == DFmode))
9527 regno = FIRST_SSE_REG;
9530 /* OImode shouldn't be used directly. */
9531 gcc_assert (mode != OImode);
9533 return gen_rtx_REG (orig_mode, regno);
9537 function_value_64 (machine_mode orig_mode, machine_mode mode,
9542 /* Handle libcalls, which don't provide a type node. */
9543 if (valtype == NULL)
9557 regno = FIRST_SSE_REG;
9561 regno = FIRST_FLOAT_REG;
9569 return gen_rtx_REG (mode, regno);
9571 else if (POINTER_TYPE_P (valtype))
9573 /* Pointers are always returned in word_mode. */
9577 ret = construct_container (mode, orig_mode, valtype, 1,
9578 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9579 x86_64_int_return_registers, 0);
9581 /* For zero sized structures, construct_container returns NULL, but we
9582 need to keep rest of compiler happy by returning meaningful value. */
9584 ret = gen_rtx_REG (orig_mode, AX_REG);
9590 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9593 unsigned int regno = AX_REG;
9597 switch (GET_MODE_SIZE (mode))
9600 if (valtype != NULL_TREE
9601 && !VECTOR_INTEGER_TYPE_P (valtype)
9602 && !VECTOR_INTEGER_TYPE_P (valtype)
9603 && !INTEGRAL_TYPE_P (valtype)
9604 && !VECTOR_FLOAT_TYPE_P (valtype))
9606 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9607 && !COMPLEX_MODE_P (mode))
9608 regno = FIRST_SSE_REG;
9612 if (mode == SFmode || mode == DFmode)
9613 regno = FIRST_SSE_REG;
9619 return gen_rtx_REG (orig_mode, regno);
9623 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9624 machine_mode orig_mode, machine_mode mode)
9626 const_tree fn, fntype;
9629 if (fntype_or_decl && DECL_P (fntype_or_decl))
9630 fn = fntype_or_decl;
9631 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9633 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9634 || POINTER_BOUNDS_MODE_P (mode))
9635 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9636 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9637 return function_value_ms_64 (orig_mode, mode, valtype);
9638 else if (TARGET_64BIT)
9639 return function_value_64 (orig_mode, mode, valtype);
9641 return function_value_32 (orig_mode, mode, fntype, fn);
9645 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9647 machine_mode mode, orig_mode;
9649 orig_mode = TYPE_MODE (valtype);
9650 mode = type_natural_mode (valtype, NULL, true);
9651 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9654 /* Return an RTX representing a place where a function returns
9655 or recieves pointer bounds or NULL if no bounds are returned.
9657 VALTYPE is a data type of a value returned by the function.
9659 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9660 or FUNCTION_TYPE of the function.
9662 If OUTGOING is false, return a place in which the caller will
9663 see the return value. Otherwise, return a place where a
9664 function returns a value. */
9667 ix86_function_value_bounds (const_tree valtype,
9668 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9669 bool outgoing ATTRIBUTE_UNUSED)
9673 if (BOUNDED_TYPE_P (valtype))
9674 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9675 else if (chkp_type_has_pointer (valtype))
9680 unsigned i, bnd_no = 0;
9682 bitmap_obstack_initialize (NULL);
9683 slots = BITMAP_ALLOC (NULL);
9684 chkp_find_bound_slots (valtype, slots);
9686 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9688 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9689 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9690 gcc_assert (bnd_no < 2);
9691 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9694 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9696 BITMAP_FREE (slots);
9697 bitmap_obstack_release (NULL);
9705 /* Pointer function arguments and return values are promoted to
9709 ix86_promote_function_mode (const_tree type, machine_mode mode,
9710 int *punsignedp, const_tree fntype,
9713 if (type != NULL_TREE && POINTER_TYPE_P (type))
9715 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9718 return default_promote_function_mode (type, mode, punsignedp, fntype,
9722 /* Return true if a structure, union or array with MODE containing FIELD
9723 should be accessed using BLKmode. */
9726 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9728 /* Union with XFmode must be in BLKmode. */
9729 return (mode == XFmode
9730 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9731 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9735 ix86_libcall_value (machine_mode mode)
9737 return ix86_function_value_1 (NULL, NULL, mode, mode);
9740 /* Return true iff type is returned in memory. */
9743 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9745 #ifdef SUBTARGET_RETURN_IN_MEMORY
9746 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9748 const machine_mode mode = type_natural_mode (type, NULL, true);
9751 if (POINTER_BOUNDS_TYPE_P (type))
9756 if (ix86_function_type_abi (fntype) == MS_ABI)
9758 size = int_size_in_bytes (type);
9760 /* __m128 is returned in xmm0. */
9761 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9762 || INTEGRAL_TYPE_P (type)
9763 || VECTOR_FLOAT_TYPE_P (type))
9764 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9765 && !COMPLEX_MODE_P (mode)
9766 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9769 /* Otherwise, the size must be exactly in [1248]. */
9770 return size != 1 && size != 2 && size != 4 && size != 8;
9774 int needed_intregs, needed_sseregs;
9776 return examine_argument (mode, type, 1,
9777 &needed_intregs, &needed_sseregs);
9782 size = int_size_in_bytes (type);
9784 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9785 bytes in registers. */
9787 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9789 if (mode == BLKmode)
9792 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9795 if (VECTOR_MODE_P (mode) || mode == TImode)
9797 /* User-created vectors small enough to fit in EAX. */
9801 /* Unless ABI prescibes otherwise,
9802 MMX/3dNow values are returned in MM0 if available. */
9805 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9807 /* SSE values are returned in XMM0 if available. */
9811 /* AVX values are returned in YMM0 if available. */
9815 /* AVX512F values are returned in ZMM0 if available. */
9817 return !TARGET_AVX512F;
9826 /* OImode shouldn't be used directly. */
9827 gcc_assert (mode != OImode);
9835 /* Create the va_list data type. */
9838 ix86_build_builtin_va_list_64 (void)
9840 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9842 record = lang_hooks.types.make_type (RECORD_TYPE);
9843 type_decl = build_decl (BUILTINS_LOCATION,
9844 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9846 f_gpr = build_decl (BUILTINS_LOCATION,
9847 FIELD_DECL, get_identifier ("gp_offset"),
9848 unsigned_type_node);
9849 f_fpr = build_decl (BUILTINS_LOCATION,
9850 FIELD_DECL, get_identifier ("fp_offset"),
9851 unsigned_type_node);
9852 f_ovf = build_decl (BUILTINS_LOCATION,
9853 FIELD_DECL, get_identifier ("overflow_arg_area"),
9855 f_sav = build_decl (BUILTINS_LOCATION,
9856 FIELD_DECL, get_identifier ("reg_save_area"),
9859 va_list_gpr_counter_field = f_gpr;
9860 va_list_fpr_counter_field = f_fpr;
9862 DECL_FIELD_CONTEXT (f_gpr) = record;
9863 DECL_FIELD_CONTEXT (f_fpr) = record;
9864 DECL_FIELD_CONTEXT (f_ovf) = record;
9865 DECL_FIELD_CONTEXT (f_sav) = record;
9867 TYPE_STUB_DECL (record) = type_decl;
9868 TYPE_NAME (record) = type_decl;
9869 TYPE_FIELDS (record) = f_gpr;
9870 DECL_CHAIN (f_gpr) = f_fpr;
9871 DECL_CHAIN (f_fpr) = f_ovf;
9872 DECL_CHAIN (f_ovf) = f_sav;
9874 layout_type (record);
9876 /* The correct type is an array type of one element. */
9877 return build_array_type (record, build_index_type (size_zero_node));
9880 /* Setup the builtin va_list data type and for 64-bit the additional
9881 calling convention specific va_list data types. */
9884 ix86_build_builtin_va_list (void)
9888 /* Initialize ABI specific va_list builtin types. */
9889 tree sysv_va_list, ms_va_list;
9891 sysv_va_list = ix86_build_builtin_va_list_64 ();
9892 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9894 /* For MS_ABI we use plain pointer to argument area. */
9895 ms_va_list = build_pointer_type (char_type_node);
9896 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9898 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9902 /* For i386 we use plain pointer to argument area. */
9903 return build_pointer_type (char_type_node);
9907 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9910 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9916 /* GPR size of varargs save area. */
9917 if (cfun->va_list_gpr_size)
9918 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9920 ix86_varargs_gpr_size = 0;
9922 /* FPR size of varargs save area. We don't need it if we don't pass
9923 anything in SSE registers. */
9924 if (TARGET_SSE && cfun->va_list_fpr_size)
9925 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9927 ix86_varargs_fpr_size = 0;
9929 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9932 save_area = frame_pointer_rtx;
9933 set = get_varargs_alias_set ();
9935 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9936 if (max > X86_64_REGPARM_MAX)
9937 max = X86_64_REGPARM_MAX;
9939 for (i = cum->regno; i < max; i++)
9941 mem = gen_rtx_MEM (word_mode,
9942 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9943 MEM_NOTRAP_P (mem) = 1;
9944 set_mem_alias_set (mem, set);
9945 emit_move_insn (mem,
9946 gen_rtx_REG (word_mode,
9947 x86_64_int_parameter_registers[i]));
9950 if (ix86_varargs_fpr_size)
9953 rtx_code_label *label;
9956 /* Now emit code to save SSE registers. The AX parameter contains number
9957 of SSE parameter registers used to call this function, though all we
9958 actually check here is the zero/non-zero status. */
9960 label = gen_label_rtx ();
9961 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9962 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9965 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9966 we used movdqa (i.e. TImode) instead? Perhaps even better would
9967 be if we could determine the real mode of the data, via a hook
9968 into pass_stdarg. Ignore all that for now. */
9970 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9971 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9973 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9974 if (max > X86_64_SSE_REGPARM_MAX)
9975 max = X86_64_SSE_REGPARM_MAX;
9977 for (i = cum->sse_regno; i < max; ++i)
9979 mem = plus_constant (Pmode, save_area,
9980 i * 16 + ix86_varargs_gpr_size);
9981 mem = gen_rtx_MEM (smode, mem);
9982 MEM_NOTRAP_P (mem) = 1;
9983 set_mem_alias_set (mem, set);
9984 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9986 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9994 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9996 alias_set_type set = get_varargs_alias_set ();
9999 /* Reset to zero, as there might be a sysv vaarg used
10001 ix86_varargs_gpr_size = 0;
10002 ix86_varargs_fpr_size = 0;
10004 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
10008 mem = gen_rtx_MEM (Pmode,
10009 plus_constant (Pmode, virtual_incoming_args_rtx,
10010 i * UNITS_PER_WORD));
10011 MEM_NOTRAP_P (mem) = 1;
10012 set_mem_alias_set (mem, set);
10014 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10015 emit_move_insn (mem, reg);
10020 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10021 tree type, int *, int no_rtl)
10023 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10024 CUMULATIVE_ARGS next_cum;
10027 /* This argument doesn't appear to be used anymore. Which is good,
10028 because the old code here didn't suppress rtl generation. */
10029 gcc_assert (!no_rtl);
10034 fntype = TREE_TYPE (current_function_decl);
10036 /* For varargs, we do not want to skip the dummy va_dcl argument.
10037 For stdargs, we do want to skip the last named argument. */
10039 if (stdarg_p (fntype))
10040 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10043 if (cum->call_abi == MS_ABI)
10044 setup_incoming_varargs_ms_64 (&next_cum);
10046 setup_incoming_varargs_64 (&next_cum);
10050 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10051 enum machine_mode mode,
10053 int *pretend_size ATTRIBUTE_UNUSED,
10056 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10057 CUMULATIVE_ARGS next_cum;
10060 int bnd_reg, i, max;
10062 gcc_assert (!no_rtl);
10064 /* Do nothing if we use plain pointer to argument area. */
10065 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10068 fntype = TREE_TYPE (current_function_decl);
10070 /* For varargs, we do not want to skip the dummy va_dcl argument.
10071 For stdargs, we do want to skip the last named argument. */
10073 if (stdarg_p (fntype))
10074 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10076 save_area = frame_pointer_rtx;
10078 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10079 if (max > X86_64_REGPARM_MAX)
10080 max = X86_64_REGPARM_MAX;
10082 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10083 if (chkp_function_instrumented_p (current_function_decl))
10084 for (i = cum->regno; i < max; i++)
10086 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10087 rtx ptr = gen_rtx_REG (Pmode,
10088 x86_64_int_parameter_registers[i]);
10091 if (bnd_reg <= LAST_BND_REG)
10092 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10096 plus_constant (Pmode, arg_pointer_rtx,
10097 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10098 bounds = gen_reg_rtx (BNDmode);
10099 emit_insn (BNDmode == BND64mode
10100 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10101 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10104 emit_insn (BNDmode == BND64mode
10105 ? gen_bnd64_stx (addr, ptr, bounds)
10106 : gen_bnd32_stx (addr, ptr, bounds));
10113 /* Checks if TYPE is of kind va_list char *. */
10116 is_va_list_char_pointer (tree type)
10120 /* For 32-bit it is always true. */
10123 canonic = ix86_canonical_va_list_type (type);
10124 return (canonic == ms_va_list_type_node
10125 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10128 /* Implement va_start. */
10131 ix86_va_start (tree valist, rtx nextarg)
10133 HOST_WIDE_INT words, n_gpr, n_fpr;
10134 tree f_gpr, f_fpr, f_ovf, f_sav;
10135 tree gpr, fpr, ovf, sav, t;
10139 if (flag_split_stack
10140 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10142 unsigned int scratch_regno;
10144 /* When we are splitting the stack, we can't refer to the stack
10145 arguments using internal_arg_pointer, because they may be on
10146 the old stack. The split stack prologue will arrange to
10147 leave a pointer to the old stack arguments in a scratch
10148 register, which we here copy to a pseudo-register. The split
10149 stack prologue can't set the pseudo-register directly because
10150 it (the prologue) runs before any registers have been saved. */
10152 scratch_regno = split_stack_prologue_scratch_regno ();
10153 if (scratch_regno != INVALID_REGNUM)
10158 reg = gen_reg_rtx (Pmode);
10159 cfun->machine->split_stack_varargs_pointer = reg;
10162 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10163 seq = get_insns ();
10166 push_topmost_sequence ();
10167 emit_insn_after (seq, entry_of_function ());
10168 pop_topmost_sequence ();
10172 /* Only 64bit target needs something special. */
10173 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10175 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10176 std_expand_builtin_va_start (valist, nextarg);
10181 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10182 next = expand_binop (ptr_mode, add_optab,
10183 cfun->machine->split_stack_varargs_pointer,
10184 crtl->args.arg_offset_rtx,
10185 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10186 convert_move (va_r, next, 0);
10188 /* Store zero bounds for va_list. */
10189 if (chkp_function_instrumented_p (current_function_decl))
10190 chkp_expand_bounds_reset_for_mem (valist,
10191 make_tree (TREE_TYPE (valist),
10198 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10199 f_fpr = DECL_CHAIN (f_gpr);
10200 f_ovf = DECL_CHAIN (f_fpr);
10201 f_sav = DECL_CHAIN (f_ovf);
10203 valist = build_simple_mem_ref (valist);
10204 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10205 /* The following should be folded into the MEM_REF offset. */
10206 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10208 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10210 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10212 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10215 /* Count number of gp and fp argument registers used. */
10216 words = crtl->args.info.words;
10217 n_gpr = crtl->args.info.regno;
10218 n_fpr = crtl->args.info.sse_regno;
10220 if (cfun->va_list_gpr_size)
10222 type = TREE_TYPE (gpr);
10223 t = build2 (MODIFY_EXPR, type,
10224 gpr, build_int_cst (type, n_gpr * 8));
10225 TREE_SIDE_EFFECTS (t) = 1;
10226 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10229 if (TARGET_SSE && cfun->va_list_fpr_size)
10231 type = TREE_TYPE (fpr);
10232 t = build2 (MODIFY_EXPR, type, fpr,
10233 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10234 TREE_SIDE_EFFECTS (t) = 1;
10235 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10238 /* Find the overflow area. */
10239 type = TREE_TYPE (ovf);
10240 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10241 ovf_rtx = crtl->args.internal_arg_pointer;
10243 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10244 t = make_tree (type, ovf_rtx);
10246 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10248 /* Store zero bounds for overflow area pointer. */
10249 if (chkp_function_instrumented_p (current_function_decl))
10250 chkp_expand_bounds_reset_for_mem (ovf, t);
10252 t = build2 (MODIFY_EXPR, type, ovf, t);
10253 TREE_SIDE_EFFECTS (t) = 1;
10254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10256 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10258 /* Find the register save area.
10259 Prologue of the function save it right above stack frame. */
10260 type = TREE_TYPE (sav);
10261 t = make_tree (type, frame_pointer_rtx);
10262 if (!ix86_varargs_gpr_size)
10263 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10265 /* Store zero bounds for save area pointer. */
10266 if (chkp_function_instrumented_p (current_function_decl))
10267 chkp_expand_bounds_reset_for_mem (sav, t);
10269 t = build2 (MODIFY_EXPR, type, sav, t);
10270 TREE_SIDE_EFFECTS (t) = 1;
10271 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10275 /* Implement va_arg. */
10278 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10279 gimple_seq *post_p)
10281 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10282 tree f_gpr, f_fpr, f_ovf, f_sav;
10283 tree gpr, fpr, ovf, sav, t;
10285 tree lab_false, lab_over = NULL_TREE;
10288 int indirect_p = 0;
10290 machine_mode nat_mode;
10291 unsigned int arg_boundary;
10293 /* Only 64bit target needs something special. */
10294 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10295 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10297 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10298 f_fpr = DECL_CHAIN (f_gpr);
10299 f_ovf = DECL_CHAIN (f_fpr);
10300 f_sav = DECL_CHAIN (f_ovf);
10302 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10303 valist, f_gpr, NULL_TREE);
10305 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10306 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10307 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10309 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10311 type = build_pointer_type (type);
10312 size = int_size_in_bytes (type);
10313 rsize = CEIL (size, UNITS_PER_WORD);
10315 nat_mode = type_natural_mode (type, NULL, false);
10330 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10331 if (!TARGET_64BIT_MS_ABI)
10338 container = construct_container (nat_mode, TYPE_MODE (type),
10339 type, 0, X86_64_REGPARM_MAX,
10340 X86_64_SSE_REGPARM_MAX, intreg,
10345 /* Pull the value out of the saved registers. */
10347 addr = create_tmp_var (ptr_type_node, "addr");
10351 int needed_intregs, needed_sseregs;
10353 tree int_addr, sse_addr;
10355 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10356 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10358 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10360 need_temp = (!REG_P (container)
10361 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10362 || TYPE_ALIGN (type) > 128));
10364 /* In case we are passing structure, verify that it is consecutive block
10365 on the register save area. If not we need to do moves. */
10366 if (!need_temp && !REG_P (container))
10368 /* Verify that all registers are strictly consecutive */
10369 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10373 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10375 rtx slot = XVECEXP (container, 0, i);
10376 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10377 || INTVAL (XEXP (slot, 1)) != i * 16)
10385 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10387 rtx slot = XVECEXP (container, 0, i);
10388 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10389 || INTVAL (XEXP (slot, 1)) != i * 8)
10401 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10402 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10405 /* First ensure that we fit completely in registers. */
10406 if (needed_intregs)
10408 t = build_int_cst (TREE_TYPE (gpr),
10409 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10410 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10411 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10412 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10413 gimplify_and_add (t, pre_p);
10415 if (needed_sseregs)
10417 t = build_int_cst (TREE_TYPE (fpr),
10418 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10419 + X86_64_REGPARM_MAX * 8);
10420 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10421 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10422 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10423 gimplify_and_add (t, pre_p);
10426 /* Compute index to start of area used for integer regs. */
10427 if (needed_intregs)
10429 /* int_addr = gpr + sav; */
10430 t = fold_build_pointer_plus (sav, gpr);
10431 gimplify_assign (int_addr, t, pre_p);
10433 if (needed_sseregs)
10435 /* sse_addr = fpr + sav; */
10436 t = fold_build_pointer_plus (sav, fpr);
10437 gimplify_assign (sse_addr, t, pre_p);
10441 int i, prev_size = 0;
10442 tree temp = create_tmp_var (type, "va_arg_tmp");
10444 /* addr = &temp; */
10445 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10446 gimplify_assign (addr, t, pre_p);
10448 for (i = 0; i < XVECLEN (container, 0); i++)
10450 rtx slot = XVECEXP (container, 0, i);
10451 rtx reg = XEXP (slot, 0);
10452 machine_mode mode = GET_MODE (reg);
10456 tree src_addr, src;
10458 tree dest_addr, dest;
10459 int cur_size = GET_MODE_SIZE (mode);
10461 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10462 prev_size = INTVAL (XEXP (slot, 1));
10463 if (prev_size + cur_size > size)
10465 cur_size = size - prev_size;
10466 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10467 if (mode == BLKmode)
10470 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10471 if (mode == GET_MODE (reg))
10472 addr_type = build_pointer_type (piece_type);
10474 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10476 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10479 if (SSE_REGNO_P (REGNO (reg)))
10481 src_addr = sse_addr;
10482 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10486 src_addr = int_addr;
10487 src_offset = REGNO (reg) * 8;
10489 src_addr = fold_convert (addr_type, src_addr);
10490 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10492 dest_addr = fold_convert (daddr_type, addr);
10493 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10494 if (cur_size == GET_MODE_SIZE (mode))
10496 src = build_va_arg_indirect_ref (src_addr);
10497 dest = build_va_arg_indirect_ref (dest_addr);
10499 gimplify_assign (dest, src, pre_p);
10504 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10505 3, dest_addr, src_addr,
10506 size_int (cur_size));
10507 gimplify_and_add (copy, pre_p);
10509 prev_size += cur_size;
10513 if (needed_intregs)
10515 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10516 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10517 gimplify_assign (gpr, t, pre_p);
10520 if (needed_sseregs)
10522 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10523 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10524 gimplify_assign (unshare_expr (fpr), t, pre_p);
10527 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10529 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10532 /* ... otherwise out of the overflow area. */
10534 /* When we align parameter on stack for caller, if the parameter
10535 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10536 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10537 here with caller. */
10538 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10539 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10540 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10542 /* Care for on-stack alignment if needed. */
10543 if (arg_boundary <= 64 || size == 0)
10547 HOST_WIDE_INT align = arg_boundary / 8;
10548 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10549 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10550 build_int_cst (TREE_TYPE (t), -align));
10553 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10554 gimplify_assign (addr, t, pre_p);
10556 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10557 gimplify_assign (unshare_expr (ovf), t, pre_p);
10560 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10562 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10563 addr = fold_convert (ptrtype, addr);
10566 addr = build_va_arg_indirect_ref (addr);
10567 return build_va_arg_indirect_ref (addr);
10570 /* Return true if OPNUM's MEM should be matched
10571 in movabs* patterns. */
10574 ix86_check_movabs (rtx insn, int opnum)
10578 set = PATTERN (insn);
10579 if (GET_CODE (set) == PARALLEL)
10580 set = XVECEXP (set, 0, 0);
10581 gcc_assert (GET_CODE (set) == SET);
10582 mem = XEXP (set, opnum);
10583 while (SUBREG_P (mem))
10584 mem = SUBREG_REG (mem);
10585 gcc_assert (MEM_P (mem));
10586 return volatile_ok || !MEM_VOLATILE_P (mem);
10589 /* Return false if INSN contains a MEM with a non-default address space. */
10591 ix86_check_no_addr_space (rtx insn)
10593 subrtx_var_iterator::array_type array;
10594 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10597 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10603 /* Initialize the table of extra 80387 mathematical constants. */
10606 init_ext_80387_constants (void)
10608 static const char * cst[5] =
10610 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10611 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10612 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10613 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10614 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10618 for (i = 0; i < 5; i++)
10620 real_from_string (&ext_80387_constants_table[i], cst[i]);
10621 /* Ensure each constant is rounded to XFmode precision. */
10622 real_convert (&ext_80387_constants_table[i],
10623 XFmode, &ext_80387_constants_table[i]);
10626 ext_80387_constants_init = 1;
10629 /* Return non-zero if the constant is something that
10630 can be loaded with a special instruction. */
10633 standard_80387_constant_p (rtx x)
10635 machine_mode mode = GET_MODE (x);
10637 const REAL_VALUE_TYPE *r;
10639 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10642 if (x == CONST0_RTX (mode))
10644 if (x == CONST1_RTX (mode))
10647 r = CONST_DOUBLE_REAL_VALUE (x);
10649 /* For XFmode constants, try to find a special 80387 instruction when
10650 optimizing for size or on those CPUs that benefit from them. */
10652 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10656 if (! ext_80387_constants_init)
10657 init_ext_80387_constants ();
10659 for (i = 0; i < 5; i++)
10660 if (real_identical (r, &ext_80387_constants_table[i]))
10664 /* Load of the constant -0.0 or -1.0 will be split as
10665 fldz;fchs or fld1;fchs sequence. */
10666 if (real_isnegzero (r))
10668 if (real_identical (r, &dconstm1))
10674 /* Return the opcode of the special instruction to be used to load
10678 standard_80387_constant_opcode (rtx x)
10680 switch (standard_80387_constant_p (x))
10700 gcc_unreachable ();
10704 /* Return the CONST_DOUBLE representing the 80387 constant that is
10705 loaded by the specified special instruction. The argument IDX
10706 matches the return value from standard_80387_constant_p. */
10709 standard_80387_constant_rtx (int idx)
10713 if (! ext_80387_constants_init)
10714 init_ext_80387_constants ();
10727 gcc_unreachable ();
10730 return const_double_from_real_value (ext_80387_constants_table[i],
10734 /* Return 1 if X is all 0s and 2 if x is all 1s
10735 in supported SSE/AVX vector mode. */
10738 standard_sse_constant_p (rtx x)
10745 mode = GET_MODE (x);
10747 if (x == const0_rtx || x == CONST0_RTX (mode))
10749 if (vector_all_ones_operand (x, mode))
10768 if (TARGET_AVX512F)
10777 /* Return the opcode of the special instruction to be used to load
10781 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10783 switch (standard_sse_constant_p (x))
10786 switch (get_attr_mode (insn))
10789 return "vpxord\t%g0, %g0, %g0";
10791 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10792 : "vpxord\t%g0, %g0, %g0";
10794 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10795 : "vpxorq\t%g0, %g0, %g0";
10797 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10798 : "%vpxor\t%0, %d0";
10800 return "%vxorpd\t%0, %d0";
10802 return "%vxorps\t%0, %d0";
10805 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10806 : "vpxor\t%x0, %x0, %x0";
10808 return "vxorpd\t%x0, %x0, %x0";
10810 return "vxorps\t%x0, %x0, %x0";
10817 if (TARGET_AVX512VL
10818 || get_attr_mode (insn) == MODE_XI
10819 || get_attr_mode (insn) == MODE_V8DF
10820 || get_attr_mode (insn) == MODE_V16SF)
10821 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10823 return "vpcmpeqd\t%0, %0, %0";
10825 return "pcmpeqd\t%0, %0";
10830 gcc_unreachable ();
10833 /* Returns true if OP contains a symbol reference */
10836 symbolic_reference_mentioned_p (rtx op)
10841 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10844 fmt = GET_RTX_FORMAT (GET_CODE (op));
10845 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10851 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10852 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10856 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10863 /* Return true if it is appropriate to emit `ret' instructions in the
10864 body of a function. Do this only if the epilogue is simple, needing a
10865 couple of insns. Prior to reloading, we can't tell how many registers
10866 must be saved, so return false then. Return false if there is no frame
10867 marker to de-allocate. */
10870 ix86_can_use_return_insn_p (void)
10872 struct ix86_frame frame;
10874 if (! reload_completed || frame_pointer_needed)
10877 /* Don't allow more than 32k pop, since that's all we can do
10878 with one instruction. */
10879 if (crtl->args.pops_args && crtl->args.size >= 32768)
10882 ix86_compute_frame_layout (&frame);
10883 return (frame.stack_pointer_offset == UNITS_PER_WORD
10884 && (frame.nregs + frame.nsseregs) == 0);
10887 /* Value should be nonzero if functions must have frame pointers.
10888 Zero means the frame pointer need not be set up (and parms may
10889 be accessed via the stack pointer) in functions that seem suitable. */
10892 ix86_frame_pointer_required (void)
10894 /* If we accessed previous frames, then the generated code expects
10895 to be able to access the saved ebp value in our frame. */
10896 if (cfun->machine->accesses_prev_frame)
10899 /* Several x86 os'es need a frame pointer for other reasons,
10900 usually pertaining to setjmp. */
10901 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10904 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10905 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10908 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10909 allocation is 4GB. */
10910 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10913 /* SSE saves require frame-pointer when stack is misaligned. */
10914 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
10917 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10918 turns off the frame pointer by default. Turn it back on now if
10919 we've not got a leaf function. */
10920 if (TARGET_OMIT_LEAF_FRAME_POINTER
10922 || ix86_current_function_calls_tls_descriptor))
10925 if (crtl->profile && !flag_fentry)
10931 /* Record that the current function accesses previous call frames. */
10934 ix86_setup_frame_addresses (void)
10936 cfun->machine->accesses_prev_frame = 1;
10939 #ifndef USE_HIDDEN_LINKONCE
10940 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10941 # define USE_HIDDEN_LINKONCE 1
10943 # define USE_HIDDEN_LINKONCE 0
10947 static int pic_labels_used;
10949 /* Fills in the label name that should be used for a pc thunk for
10950 the given register. */
10953 get_pc_thunk_name (char name[32], unsigned int regno)
10955 gcc_assert (!TARGET_64BIT);
10957 if (USE_HIDDEN_LINKONCE)
10958 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10960 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10964 /* This function generates code for -fpic that loads %ebx with
10965 the return address of the caller and then returns. */
10968 ix86_code_end (void)
10973 for (regno = AX_REG; regno <= SP_REG; regno++)
10978 if (!(pic_labels_used & (1 << regno)))
10981 get_pc_thunk_name (name, regno);
10983 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10984 get_identifier (name),
10985 build_function_type_list (void_type_node, NULL_TREE));
10986 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10987 NULL_TREE, void_type_node);
10988 TREE_PUBLIC (decl) = 1;
10989 TREE_STATIC (decl) = 1;
10990 DECL_IGNORED_P (decl) = 1;
10995 switch_to_section (darwin_sections[text_coal_section]);
10996 fputs ("\t.weak_definition\t", asm_out_file);
10997 assemble_name (asm_out_file, name);
10998 fputs ("\n\t.private_extern\t", asm_out_file);
10999 assemble_name (asm_out_file, name);
11000 putc ('\n', asm_out_file);
11001 ASM_OUTPUT_LABEL (asm_out_file, name);
11002 DECL_WEAK (decl) = 1;
11006 if (USE_HIDDEN_LINKONCE)
11008 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
11010 targetm.asm_out.unique_section (decl, 0);
11011 switch_to_section (get_named_section (decl, NULL, 0));
11013 targetm.asm_out.globalize_label (asm_out_file, name);
11014 fputs ("\t.hidden\t", asm_out_file);
11015 assemble_name (asm_out_file, name);
11016 putc ('\n', asm_out_file);
11017 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11021 switch_to_section (text_section);
11022 ASM_OUTPUT_LABEL (asm_out_file, name);
11025 DECL_INITIAL (decl) = make_node (BLOCK);
11026 current_function_decl = decl;
11027 allocate_struct_function (decl, false);
11028 init_function_start (decl);
11029 first_function_block_is_cold = false;
11030 /* Make sure unwind info is emitted for the thunk if needed. */
11031 final_start_function (emit_barrier (), asm_out_file, 1);
11033 /* Pad stack IP move with 4 instructions (two NOPs count
11034 as one instruction). */
11035 if (TARGET_PAD_SHORT_FUNCTION)
11040 fputs ("\tnop\n", asm_out_file);
11043 xops[0] = gen_rtx_REG (Pmode, regno);
11044 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11045 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11046 output_asm_insn ("%!ret", NULL);
11047 final_end_function ();
11048 init_insn_lengths ();
11049 free_after_compilation (cfun);
11051 current_function_decl = NULL;
11054 if (flag_split_stack)
11055 file_end_indicate_split_stack ();
11058 /* Emit code for the SET_GOT patterns. */
11061 output_set_got (rtx dest, rtx label)
11067 if (TARGET_VXWORKS_RTP && flag_pic)
11069 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11070 xops[2] = gen_rtx_MEM (Pmode,
11071 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11072 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11074 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11075 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11076 an unadorned address. */
11077 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11078 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11079 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11083 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11088 /* We don't need a pic base, we're not producing pic. */
11089 gcc_unreachable ();
11091 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11092 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11093 targetm.asm_out.internal_label (asm_out_file, "L",
11094 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11099 get_pc_thunk_name (name, REGNO (dest));
11100 pic_labels_used |= 1 << REGNO (dest);
11102 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11103 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11104 output_asm_insn ("%!call\t%X2", xops);
11107 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11108 This is what will be referenced by the Mach-O PIC subsystem. */
11109 if (machopic_should_output_picbase_label () || !label)
11110 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11112 /* When we are restoring the pic base at the site of a nonlocal label,
11113 and we decided to emit the pic base above, we will still output a
11114 local label used for calculating the correction offset (even though
11115 the offset will be 0 in that case). */
11117 targetm.asm_out.internal_label (asm_out_file, "L",
11118 CODE_LABEL_NUMBER (label));
11123 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11128 /* Generate an "push" pattern for input ARG. */
11133 struct machine_function *m = cfun->machine;
11135 if (m->fs.cfa_reg == stack_pointer_rtx)
11136 m->fs.cfa_offset += UNITS_PER_WORD;
11137 m->fs.sp_offset += UNITS_PER_WORD;
11139 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11140 arg = gen_rtx_REG (word_mode, REGNO (arg));
11142 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11143 gen_rtx_PRE_DEC (Pmode,
11144 stack_pointer_rtx)),
11148 /* Generate an "pop" pattern for input ARG. */
11153 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11154 arg = gen_rtx_REG (word_mode, REGNO (arg));
11156 return gen_rtx_SET (arg,
11157 gen_rtx_MEM (word_mode,
11158 gen_rtx_POST_INC (Pmode,
11159 stack_pointer_rtx)));
11162 /* Return >= 0 if there is an unused call-clobbered register available
11163 for the entire function. */
11165 static unsigned int
11166 ix86_select_alt_pic_regnum (void)
11168 if (ix86_use_pseudo_pic_reg ())
11169 return INVALID_REGNUM;
11173 && !ix86_current_function_calls_tls_descriptor)
11176 /* Can't use the same register for both PIC and DRAP. */
11177 if (crtl->drap_reg)
11178 drap = REGNO (crtl->drap_reg);
11181 for (i = 2; i >= 0; --i)
11182 if (i != drap && !df_regs_ever_live_p (i))
11186 return INVALID_REGNUM;
11189 /* Return TRUE if we need to save REGNO. */
11192 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11194 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11195 && pic_offset_table_rtx)
11197 if (ix86_use_pseudo_pic_reg ())
11199 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11200 _mcount in prologue. */
11201 if (!TARGET_64BIT && flag_pic && crtl->profile)
11204 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11206 || crtl->calls_eh_return
11207 || crtl->uses_const_pool
11208 || cfun->has_nonlocal_label)
11209 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11212 if (crtl->calls_eh_return && maybe_eh_return)
11217 unsigned test = EH_RETURN_DATA_REGNO (i);
11218 if (test == INVALID_REGNUM)
11226 && regno == REGNO (crtl->drap_reg)
11227 && !cfun->machine->no_drap_save_restore)
11230 return (df_regs_ever_live_p (regno)
11231 && !call_used_regs[regno]
11232 && !fixed_regs[regno]
11233 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11236 /* Return number of saved general prupose registers. */
11239 ix86_nsaved_regs (void)
11244 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11245 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11250 /* Return number of saved SSE registers. */
11253 ix86_nsaved_sseregs (void)
11258 if (!TARGET_64BIT_MS_ABI)
11260 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11261 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11266 /* Given FROM and TO register numbers, say whether this elimination is
11267 allowed. If stack alignment is needed, we can only replace argument
11268 pointer with hard frame pointer, or replace frame pointer with stack
11269 pointer. Otherwise, frame pointer elimination is automatically
11270 handled and all other eliminations are valid. */
11273 ix86_can_eliminate (const int from, const int to)
11275 if (stack_realign_fp)
11276 return ((from == ARG_POINTER_REGNUM
11277 && to == HARD_FRAME_POINTER_REGNUM)
11278 || (from == FRAME_POINTER_REGNUM
11279 && to == STACK_POINTER_REGNUM));
11281 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11284 /* Return the offset between two registers, one to be eliminated, and the other
11285 its replacement, at the start of a routine. */
11288 ix86_initial_elimination_offset (int from, int to)
11290 struct ix86_frame frame;
11291 ix86_compute_frame_layout (&frame);
11293 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11294 return frame.hard_frame_pointer_offset;
11295 else if (from == FRAME_POINTER_REGNUM
11296 && to == HARD_FRAME_POINTER_REGNUM)
11297 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11300 gcc_assert (to == STACK_POINTER_REGNUM);
11302 if (from == ARG_POINTER_REGNUM)
11303 return frame.stack_pointer_offset;
11305 gcc_assert (from == FRAME_POINTER_REGNUM);
11306 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11310 /* In a dynamically-aligned function, we can't know the offset from
11311 stack pointer to frame pointer, so we must ensure that setjmp
11312 eliminates fp against the hard fp (%ebp) rather than trying to
11313 index from %esp up to the top of the frame across a gap that is
11314 of unknown (at compile-time) size. */
11316 ix86_builtin_setjmp_frame_value (void)
11318 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11321 /* When using -fsplit-stack, the allocation routines set a field in
11322 the TCB to the bottom of the stack plus this much space, measured
11325 #define SPLIT_STACK_AVAILABLE 256
11327 /* Fill structure ix86_frame about frame of currently computed function. */
11330 ix86_compute_frame_layout (struct ix86_frame *frame)
11332 unsigned HOST_WIDE_INT stack_alignment_needed;
11333 HOST_WIDE_INT offset;
11334 unsigned HOST_WIDE_INT preferred_alignment;
11335 HOST_WIDE_INT size = get_frame_size ();
11336 HOST_WIDE_INT to_allocate;
11338 frame->nregs = ix86_nsaved_regs ();
11339 frame->nsseregs = ix86_nsaved_sseregs ();
11341 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11342 except for function prologues, leaf functions and when the defult
11343 incoming stack boundary is overriden at command line or via
11344 force_align_arg_pointer attribute. */
11345 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11346 && (!crtl->is_leaf || cfun->calls_alloca != 0
11347 || ix86_current_function_calls_tls_descriptor
11348 || ix86_incoming_stack_boundary < 128))
11350 crtl->preferred_stack_boundary = 128;
11351 crtl->stack_alignment_needed = 128;
11353 /* preferred_stack_boundary is never updated for call
11354 expanded from tls descriptor. Update it here. We don't update it in
11355 expand stage because according to the comments before
11356 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
11358 else if (ix86_current_function_calls_tls_descriptor
11359 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
11361 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
11362 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
11363 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
11366 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11367 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11369 gcc_assert (!size || stack_alignment_needed);
11370 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11371 gcc_assert (preferred_alignment <= stack_alignment_needed);
11373 /* For SEH we have to limit the amount of code movement into the prologue.
11374 At present we do this via a BLOCKAGE, at which point there's very little
11375 scheduling that can be done, which means that there's very little point
11376 in doing anything except PUSHs. */
11378 cfun->machine->use_fast_prologue_epilogue = false;
11380 /* During reload iteration the amount of registers saved can change.
11381 Recompute the value as needed. Do not recompute when amount of registers
11382 didn't change as reload does multiple calls to the function and does not
11383 expect the decision to change within single iteration. */
11384 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11385 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11387 int count = frame->nregs;
11388 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11390 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11392 /* The fast prologue uses move instead of push to save registers. This
11393 is significantly longer, but also executes faster as modern hardware
11394 can execute the moves in parallel, but can't do that for push/pop.
11396 Be careful about choosing what prologue to emit: When function takes
11397 many instructions to execute we may use slow version as well as in
11398 case function is known to be outside hot spot (this is known with
11399 feedback only). Weight the size of function by number of registers
11400 to save as it is cheap to use one or two push instructions but very
11401 slow to use many of them. */
11403 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11404 if (node->frequency < NODE_FREQUENCY_NORMAL
11405 || (flag_branch_probabilities
11406 && node->frequency < NODE_FREQUENCY_HOT))
11407 cfun->machine->use_fast_prologue_epilogue = false;
11409 cfun->machine->use_fast_prologue_epilogue
11410 = !expensive_function_p (count);
11413 frame->save_regs_using_mov
11414 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11415 /* If static stack checking is enabled and done with probes,
11416 the registers need to be saved before allocating the frame. */
11417 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11419 /* Skip return address. */
11420 offset = UNITS_PER_WORD;
11422 /* Skip pushed static chain. */
11423 if (ix86_static_chain_on_stack)
11424 offset += UNITS_PER_WORD;
11426 /* Skip saved base pointer. */
11427 if (frame_pointer_needed)
11428 offset += UNITS_PER_WORD;
11429 frame->hfp_save_offset = offset;
11431 /* The traditional frame pointer location is at the top of the frame. */
11432 frame->hard_frame_pointer_offset = offset;
11434 /* Register save area */
11435 offset += frame->nregs * UNITS_PER_WORD;
11436 frame->reg_save_offset = offset;
11438 /* On SEH target, registers are pushed just before the frame pointer
11441 frame->hard_frame_pointer_offset = offset;
11443 /* Align and set SSE register save area. */
11444 if (frame->nsseregs)
11446 /* The only ABI that has saved SSE registers (Win64) also has a
11447 16-byte aligned default stack, and thus we don't need to be
11448 within the re-aligned local stack frame to save them. In case
11449 incoming stack boundary is aligned to less than 16 bytes,
11450 unaligned move of SSE register will be emitted, so there is
11451 no point to round up the SSE register save area outside the
11452 re-aligned local stack frame to 16 bytes. */
11453 if (ix86_incoming_stack_boundary >= 128)
11454 offset = ROUND_UP (offset, 16);
11455 offset += frame->nsseregs * 16;
11457 frame->sse_reg_save_offset = offset;
11459 /* The re-aligned stack starts here. Values before this point are not
11460 directly comparable with values below this point. In order to make
11461 sure that no value happens to be the same before and after, force
11462 the alignment computation below to add a non-zero value. */
11463 if (stack_realign_fp)
11464 offset = ROUND_UP (offset, stack_alignment_needed);
11467 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11468 offset += frame->va_arg_size;
11470 /* Align start of frame for local function. */
11471 if (stack_realign_fp
11472 || offset != frame->sse_reg_save_offset
11475 || cfun->calls_alloca
11476 || ix86_current_function_calls_tls_descriptor)
11477 offset = ROUND_UP (offset, stack_alignment_needed);
11479 /* Frame pointer points here. */
11480 frame->frame_pointer_offset = offset;
11484 /* Add outgoing arguments area. Can be skipped if we eliminated
11485 all the function calls as dead code.
11486 Skipping is however impossible when function calls alloca. Alloca
11487 expander assumes that last crtl->outgoing_args_size
11488 of stack frame are unused. */
11489 if (ACCUMULATE_OUTGOING_ARGS
11490 && (!crtl->is_leaf || cfun->calls_alloca
11491 || ix86_current_function_calls_tls_descriptor))
11493 offset += crtl->outgoing_args_size;
11494 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11497 frame->outgoing_arguments_size = 0;
11499 /* Align stack boundary. Only needed if we're calling another function
11500 or using alloca. */
11501 if (!crtl->is_leaf || cfun->calls_alloca
11502 || ix86_current_function_calls_tls_descriptor)
11503 offset = ROUND_UP (offset, preferred_alignment);
11505 /* We've reached end of stack frame. */
11506 frame->stack_pointer_offset = offset;
11508 /* Size prologue needs to allocate. */
11509 to_allocate = offset - frame->sse_reg_save_offset;
11511 if ((!to_allocate && frame->nregs <= 1)
11512 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11513 frame->save_regs_using_mov = false;
11515 if (ix86_using_red_zone ()
11516 && crtl->sp_is_unchanging
11518 && !ix86_current_function_calls_tls_descriptor)
11520 frame->red_zone_size = to_allocate;
11521 if (frame->save_regs_using_mov)
11522 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11523 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11524 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11527 frame->red_zone_size = 0;
11528 frame->stack_pointer_offset -= frame->red_zone_size;
11530 /* The SEH frame pointer location is near the bottom of the frame.
11531 This is enforced by the fact that the difference between the
11532 stack pointer and the frame pointer is limited to 240 bytes in
11533 the unwind data structure. */
11536 HOST_WIDE_INT diff;
11538 /* If we can leave the frame pointer where it is, do so. Also, returns
11539 the establisher frame for __builtin_frame_address (0). */
11540 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11541 if (diff <= SEH_MAX_FRAME_SIZE
11542 && (diff > 240 || (diff & 15) != 0)
11543 && !crtl->accesses_prior_frames)
11545 /* Ideally we'd determine what portion of the local stack frame
11546 (within the constraint of the lowest 240) is most heavily used.
11547 But without that complication, simply bias the frame pointer
11548 by 128 bytes so as to maximize the amount of the local stack
11549 frame that is addressable with 8-bit offsets. */
11550 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11555 /* This is semi-inlined memory_address_length, but simplified
11556 since we know that we're always dealing with reg+offset, and
11557 to avoid having to create and discard all that rtl. */
11560 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11566 /* EBP and R13 cannot be encoded without an offset. */
11567 len = (regno == BP_REG || regno == R13_REG);
11569 else if (IN_RANGE (offset, -128, 127))
11572 /* ESP and R12 must be encoded with a SIB byte. */
11573 if (regno == SP_REG || regno == R12_REG)
11579 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11580 The valid base registers are taken from CFUN->MACHINE->FS. */
11583 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11585 const struct machine_function *m = cfun->machine;
11586 rtx base_reg = NULL;
11587 HOST_WIDE_INT base_offset = 0;
11589 if (m->use_fast_prologue_epilogue)
11591 /* Choose the base register most likely to allow the most scheduling
11592 opportunities. Generally FP is valid throughout the function,
11593 while DRAP must be reloaded within the epilogue. But choose either
11594 over the SP due to increased encoding size. */
11596 if (m->fs.fp_valid)
11598 base_reg = hard_frame_pointer_rtx;
11599 base_offset = m->fs.fp_offset - cfa_offset;
11601 else if (m->fs.drap_valid)
11603 base_reg = crtl->drap_reg;
11604 base_offset = 0 - cfa_offset;
11606 else if (m->fs.sp_valid)
11608 base_reg = stack_pointer_rtx;
11609 base_offset = m->fs.sp_offset - cfa_offset;
11614 HOST_WIDE_INT toffset;
11615 int len = 16, tlen;
11617 /* Choose the base register with the smallest address encoding.
11618 With a tie, choose FP > DRAP > SP. */
11619 if (m->fs.sp_valid)
11621 base_reg = stack_pointer_rtx;
11622 base_offset = m->fs.sp_offset - cfa_offset;
11623 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11625 if (m->fs.drap_valid)
11627 toffset = 0 - cfa_offset;
11628 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11631 base_reg = crtl->drap_reg;
11632 base_offset = toffset;
11636 if (m->fs.fp_valid)
11638 toffset = m->fs.fp_offset - cfa_offset;
11639 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11642 base_reg = hard_frame_pointer_rtx;
11643 base_offset = toffset;
11648 gcc_assert (base_reg != NULL);
11650 return plus_constant (Pmode, base_reg, base_offset);
11653 /* Emit code to save registers in the prologue. */
11656 ix86_emit_save_regs (void)
11658 unsigned int regno;
11661 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11662 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11664 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11665 RTX_FRAME_RELATED_P (insn) = 1;
11669 /* Emit a single register save at CFA - CFA_OFFSET. */
11672 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11673 HOST_WIDE_INT cfa_offset)
11675 struct machine_function *m = cfun->machine;
11676 rtx reg = gen_rtx_REG (mode, regno);
11677 rtx unspec = NULL_RTX;
11678 rtx mem, addr, base, insn;
11679 unsigned int align;
11681 addr = choose_baseaddr (cfa_offset);
11682 mem = gen_frame_mem (mode, addr);
11684 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11685 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11686 set_mem_align (mem, align);
11688 /* SSE saves are not within re-aligned local stack frame.
11689 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11690 to emit unaligned store. */
11691 if (mode == V4SFmode && align < 128)
11692 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11694 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11695 RTX_FRAME_RELATED_P (insn) = 1;
11698 if (GET_CODE (base) == PLUS)
11699 base = XEXP (base, 0);
11700 gcc_checking_assert (REG_P (base));
11702 /* When saving registers into a re-aligned local stack frame, avoid
11703 any tricky guessing by dwarf2out. */
11704 if (m->fs.realigned)
11706 gcc_checking_assert (stack_realign_drap);
11708 if (regno == REGNO (crtl->drap_reg))
11710 /* A bit of a hack. We force the DRAP register to be saved in
11711 the re-aligned stack frame, which provides us with a copy
11712 of the CFA that will last past the prologue. Install it. */
11713 gcc_checking_assert (cfun->machine->fs.fp_valid);
11714 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11715 cfun->machine->fs.fp_offset - cfa_offset);
11716 mem = gen_rtx_MEM (mode, addr);
11717 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11721 /* The frame pointer is a stable reference within the
11722 aligned frame. Use it. */
11723 gcc_checking_assert (cfun->machine->fs.fp_valid);
11724 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11725 cfun->machine->fs.fp_offset - cfa_offset);
11726 mem = gen_rtx_MEM (mode, addr);
11727 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11731 /* The memory may not be relative to the current CFA register,
11732 which means that we may need to generate a new pattern for
11733 use by the unwind info. */
11734 else if (base != m->fs.cfa_reg)
11736 addr = plus_constant (Pmode, m->fs.cfa_reg,
11737 m->fs.cfa_offset - cfa_offset);
11738 mem = gen_rtx_MEM (mode, addr);
11739 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11742 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11745 /* Emit code to save registers using MOV insns.
11746 First register is stored at CFA - CFA_OFFSET. */
11748 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11750 unsigned int regno;
11752 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11753 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11755 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11756 cfa_offset -= UNITS_PER_WORD;
11760 /* Emit code to save SSE registers using MOV insns.
11761 First register is stored at CFA - CFA_OFFSET. */
11763 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11765 unsigned int regno;
11767 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11768 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11770 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11771 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11775 static GTY(()) rtx queued_cfa_restores;
11777 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11778 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11779 Don't add the note if the previously saved value will be left untouched
11780 within stack red-zone till return, as unwinders can find the same value
11781 in the register and on the stack. */
11784 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11786 if (!crtl->shrink_wrapped
11787 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11792 add_reg_note (insn, REG_CFA_RESTORE, reg);
11793 RTX_FRAME_RELATED_P (insn) = 1;
11796 queued_cfa_restores
11797 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11800 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11803 ix86_add_queued_cfa_restore_notes (rtx insn)
11806 if (!queued_cfa_restores)
11808 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11810 XEXP (last, 1) = REG_NOTES (insn);
11811 REG_NOTES (insn) = queued_cfa_restores;
11812 queued_cfa_restores = NULL_RTX;
11813 RTX_FRAME_RELATED_P (insn) = 1;
11816 /* Expand prologue or epilogue stack adjustment.
11817 The pattern exist to put a dependency on all ebp-based memory accesses.
11818 STYLE should be negative if instructions should be marked as frame related,
11819 zero if %r11 register is live and cannot be freely used and positive
11823 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11824 int style, bool set_cfa)
11826 struct machine_function *m = cfun->machine;
11828 bool add_frame_related_expr = false;
11830 if (Pmode == SImode)
11831 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11832 else if (x86_64_immediate_operand (offset, DImode))
11833 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11837 /* r11 is used by indirect sibcall return as well, set before the
11838 epilogue and used after the epilogue. */
11840 tmp = gen_rtx_REG (DImode, R11_REG);
11843 gcc_assert (src != hard_frame_pointer_rtx
11844 && dest != hard_frame_pointer_rtx);
11845 tmp = hard_frame_pointer_rtx;
11847 insn = emit_insn (gen_rtx_SET (tmp, offset));
11849 add_frame_related_expr = true;
11851 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11854 insn = emit_insn (insn);
11856 ix86_add_queued_cfa_restore_notes (insn);
11862 gcc_assert (m->fs.cfa_reg == src);
11863 m->fs.cfa_offset += INTVAL (offset);
11864 m->fs.cfa_reg = dest;
11866 r = gen_rtx_PLUS (Pmode, src, offset);
11867 r = gen_rtx_SET (dest, r);
11868 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11869 RTX_FRAME_RELATED_P (insn) = 1;
11871 else if (style < 0)
11873 RTX_FRAME_RELATED_P (insn) = 1;
11874 if (add_frame_related_expr)
11876 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11877 r = gen_rtx_SET (dest, r);
11878 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11882 if (dest == stack_pointer_rtx)
11884 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11885 bool valid = m->fs.sp_valid;
11887 if (src == hard_frame_pointer_rtx)
11889 valid = m->fs.fp_valid;
11890 ooffset = m->fs.fp_offset;
11892 else if (src == crtl->drap_reg)
11894 valid = m->fs.drap_valid;
11899 /* Else there are two possibilities: SP itself, which we set
11900 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11901 taken care of this by hand along the eh_return path. */
11902 gcc_checking_assert (src == stack_pointer_rtx
11903 || offset == const0_rtx);
11906 m->fs.sp_offset = ooffset - INTVAL (offset);
11907 m->fs.sp_valid = valid;
11911 /* Find an available register to be used as dynamic realign argument
11912 pointer regsiter. Such a register will be written in prologue and
11913 used in begin of body, so it must not be
11914 1. parameter passing register.
11916 We reuse static-chain register if it is available. Otherwise, we
11917 use DI for i386 and R13 for x86-64. We chose R13 since it has
11920 Return: the regno of chosen register. */
11922 static unsigned int
11923 find_drap_reg (void)
11925 tree decl = cfun->decl;
11929 /* Use R13 for nested function or function need static chain.
11930 Since function with tail call may use any caller-saved
11931 registers in epilogue, DRAP must not use caller-saved
11932 register in such case. */
11933 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11940 /* Use DI for nested function or function need static chain.
11941 Since function with tail call may use any caller-saved
11942 registers in epilogue, DRAP must not use caller-saved
11943 register in such case. */
11944 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11947 /* Reuse static chain register if it isn't used for parameter
11949 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11951 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11952 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11959 /* Handle a "force_align_arg_pointer" attribute. */
11962 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11963 tree, int, bool *no_add_attrs)
11965 if (TREE_CODE (*node) != FUNCTION_TYPE
11966 && TREE_CODE (*node) != METHOD_TYPE
11967 && TREE_CODE (*node) != FIELD_DECL
11968 && TREE_CODE (*node) != TYPE_DECL)
11970 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11972 *no_add_attrs = true;
11978 /* Return minimum incoming stack alignment. */
11980 static unsigned int
11981 ix86_minimum_incoming_stack_boundary (bool sibcall)
11983 unsigned int incoming_stack_boundary;
11985 /* Prefer the one specified at command line. */
11986 if (ix86_user_incoming_stack_boundary)
11987 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11988 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11989 if -mstackrealign is used, it isn't used for sibcall check and
11990 estimated stack alignment is 128bit. */
11992 && ix86_force_align_arg_pointer
11993 && crtl->stack_alignment_estimated == 128)
11994 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11996 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11998 /* Incoming stack alignment can be changed on individual functions
11999 via force_align_arg_pointer attribute. We use the smallest
12000 incoming stack boundary. */
12001 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
12002 && lookup_attribute (ix86_force_align_arg_pointer_string,
12003 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
12004 incoming_stack_boundary = MIN_STACK_BOUNDARY;
12006 /* The incoming stack frame has to be aligned at least at
12007 parm_stack_boundary. */
12008 if (incoming_stack_boundary < crtl->parm_stack_boundary)
12009 incoming_stack_boundary = crtl->parm_stack_boundary;
12011 /* Stack at entrance of main is aligned by runtime. We use the
12012 smallest incoming stack boundary. */
12013 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12014 && DECL_NAME (current_function_decl)
12015 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12016 && DECL_FILE_SCOPE_P (current_function_decl))
12017 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12019 return incoming_stack_boundary;
12022 /* Update incoming stack boundary and estimated stack alignment. */
12025 ix86_update_stack_boundary (void)
12027 ix86_incoming_stack_boundary
12028 = ix86_minimum_incoming_stack_boundary (false);
12030 /* x86_64 vararg needs 16byte stack alignment for register save
12034 && crtl->stack_alignment_estimated < 128)
12035 crtl->stack_alignment_estimated = 128;
12038 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12039 needed or an rtx for DRAP otherwise. */
12042 ix86_get_drap_rtx (void)
12044 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12045 crtl->need_drap = true;
12047 if (stack_realign_drap)
12049 /* Assign DRAP to vDRAP and returns vDRAP */
12050 unsigned int regno = find_drap_reg ();
12053 rtx_insn *seq, *insn;
12055 arg_ptr = gen_rtx_REG (Pmode, regno);
12056 crtl->drap_reg = arg_ptr;
12059 drap_vreg = copy_to_reg (arg_ptr);
12060 seq = get_insns ();
12063 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12066 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12067 RTX_FRAME_RELATED_P (insn) = 1;
12075 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12078 ix86_internal_arg_pointer (void)
12080 return virtual_incoming_args_rtx;
12083 struct scratch_reg {
12088 /* Return a short-lived scratch register for use on function entry.
12089 In 32-bit mode, it is valid only after the registers are saved
12090 in the prologue. This register must be released by means of
12091 release_scratch_register_on_entry once it is dead. */
12094 get_scratch_register_on_entry (struct scratch_reg *sr)
12102 /* We always use R11 in 64-bit mode. */
12107 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12109 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12111 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12112 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12113 int regparm = ix86_function_regparm (fntype, decl);
12115 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12117 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12118 for the static chain register. */
12119 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12120 && drap_regno != AX_REG)
12122 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12123 for the static chain register. */
12124 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12126 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12128 /* ecx is the static chain register. */
12129 else if (regparm < 3 && !fastcall_p && !thiscall_p
12131 && drap_regno != CX_REG)
12133 else if (ix86_save_reg (BX_REG, true))
12135 /* esi is the static chain register. */
12136 else if (!(regparm == 3 && static_chain_p)
12137 && ix86_save_reg (SI_REG, true))
12139 else if (ix86_save_reg (DI_REG, true))
12143 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12148 sr->reg = gen_rtx_REG (Pmode, regno);
12151 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12152 RTX_FRAME_RELATED_P (insn) = 1;
12156 /* Release a scratch register obtained from the preceding function. */
12159 release_scratch_register_on_entry (struct scratch_reg *sr)
12163 struct machine_function *m = cfun->machine;
12164 rtx x, insn = emit_insn (gen_pop (sr->reg));
12166 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12167 RTX_FRAME_RELATED_P (insn) = 1;
12168 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12169 x = gen_rtx_SET (stack_pointer_rtx, x);
12170 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12171 m->fs.sp_offset -= UNITS_PER_WORD;
12175 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12177 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12180 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12182 /* We skip the probe for the first interval + a small dope of 4 words and
12183 probe that many bytes past the specified size to maintain a protection
12184 area at the botton of the stack. */
12185 const int dope = 4 * UNITS_PER_WORD;
12186 rtx size_rtx = GEN_INT (size), last;
12188 /* See if we have a constant small number of probes to generate. If so,
12189 that's the easy case. The run-time loop is made up of 9 insns in the
12190 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12191 for n # of intervals. */
12192 if (size <= 4 * PROBE_INTERVAL)
12194 HOST_WIDE_INT i, adjust;
12195 bool first_probe = true;
12197 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12198 values of N from 1 until it exceeds SIZE. If only one probe is
12199 needed, this will not generate any code. Then adjust and probe
12200 to PROBE_INTERVAL + SIZE. */
12201 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12205 adjust = 2 * PROBE_INTERVAL + dope;
12206 first_probe = false;
12209 adjust = PROBE_INTERVAL;
12211 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12212 plus_constant (Pmode, stack_pointer_rtx,
12214 emit_stack_probe (stack_pointer_rtx);
12218 adjust = size + PROBE_INTERVAL + dope;
12220 adjust = size + PROBE_INTERVAL - i;
12222 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12223 plus_constant (Pmode, stack_pointer_rtx,
12225 emit_stack_probe (stack_pointer_rtx);
12227 /* Adjust back to account for the additional first interval. */
12228 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12229 plus_constant (Pmode, stack_pointer_rtx,
12230 PROBE_INTERVAL + dope)));
12233 /* Otherwise, do the same as above, but in a loop. Note that we must be
12234 extra careful with variables wrapping around because we might be at
12235 the very top (or the very bottom) of the address space and we have
12236 to be able to handle this case properly; in particular, we use an
12237 equality test for the loop condition. */
12240 HOST_WIDE_INT rounded_size;
12241 struct scratch_reg sr;
12243 get_scratch_register_on_entry (&sr);
12246 /* Step 1: round SIZE to the previous multiple of the interval. */
12248 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12251 /* Step 2: compute initial and final value of the loop counter. */
12253 /* SP = SP_0 + PROBE_INTERVAL. */
12254 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12255 plus_constant (Pmode, stack_pointer_rtx,
12256 - (PROBE_INTERVAL + dope))));
12258 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12259 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12260 emit_insn (gen_rtx_SET (sr.reg,
12261 plus_constant (Pmode, stack_pointer_rtx,
12265 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12266 emit_insn (gen_rtx_SET (sr.reg,
12267 gen_rtx_PLUS (Pmode, sr.reg,
12268 stack_pointer_rtx)));
12272 /* Step 3: the loop
12276 SP = SP + PROBE_INTERVAL
12279 while (SP != LAST_ADDR)
12281 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12282 values of N from 1 until it is equal to ROUNDED_SIZE. */
12284 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12287 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12288 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12290 if (size != rounded_size)
12292 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12293 plus_constant (Pmode, stack_pointer_rtx,
12294 rounded_size - size)));
12295 emit_stack_probe (stack_pointer_rtx);
12298 /* Adjust back to account for the additional first interval. */
12299 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12300 plus_constant (Pmode, stack_pointer_rtx,
12301 PROBE_INTERVAL + dope)));
12303 release_scratch_register_on_entry (&sr);
12306 /* Even if the stack pointer isn't the CFA register, we need to correctly
12307 describe the adjustments made to it, in particular differentiate the
12308 frame-related ones from the frame-unrelated ones. */
12311 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12312 XVECEXP (expr, 0, 0)
12313 = gen_rtx_SET (stack_pointer_rtx,
12314 plus_constant (Pmode, stack_pointer_rtx, -size));
12315 XVECEXP (expr, 0, 1)
12316 = gen_rtx_SET (stack_pointer_rtx,
12317 plus_constant (Pmode, stack_pointer_rtx,
12318 PROBE_INTERVAL + dope + size));
12319 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12320 RTX_FRAME_RELATED_P (last) = 1;
12322 cfun->machine->fs.sp_offset += size;
12325 /* Make sure nothing is scheduled before we are done. */
12326 emit_insn (gen_blockage ());
12329 /* Adjust the stack pointer up to REG while probing it. */
12332 output_adjust_stack_and_probe (rtx reg)
12334 static int labelno = 0;
12338 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12341 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12343 /* SP = SP + PROBE_INTERVAL. */
12344 xops[0] = stack_pointer_rtx;
12345 xops[1] = GEN_INT (PROBE_INTERVAL);
12346 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12349 xops[1] = const0_rtx;
12350 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12352 /* Test if SP == LAST_ADDR. */
12353 xops[0] = stack_pointer_rtx;
12355 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12358 fputs ("\tjne\t", asm_out_file);
12359 assemble_name_raw (asm_out_file, loop_lab);
12360 fputc ('\n', asm_out_file);
12365 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12366 inclusive. These are offsets from the current stack pointer. */
12369 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12371 /* See if we have a constant small number of probes to generate. If so,
12372 that's the easy case. The run-time loop is made up of 6 insns in the
12373 generic case while the compile-time loop is made up of n insns for n #
12375 if (size <= 6 * PROBE_INTERVAL)
12379 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12380 it exceeds SIZE. If only one probe is needed, this will not
12381 generate any code. Then probe at FIRST + SIZE. */
12382 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12383 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12386 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12390 /* Otherwise, do the same as above, but in a loop. Note that we must be
12391 extra careful with variables wrapping around because we might be at
12392 the very top (or the very bottom) of the address space and we have
12393 to be able to handle this case properly; in particular, we use an
12394 equality test for the loop condition. */
12397 HOST_WIDE_INT rounded_size, last;
12398 struct scratch_reg sr;
12400 get_scratch_register_on_entry (&sr);
12403 /* Step 1: round SIZE to the previous multiple of the interval. */
12405 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12408 /* Step 2: compute initial and final value of the loop counter. */
12410 /* TEST_OFFSET = FIRST. */
12411 emit_move_insn (sr.reg, GEN_INT (-first));
12413 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12414 last = first + rounded_size;
12417 /* Step 3: the loop
12421 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12424 while (TEST_ADDR != LAST_ADDR)
12426 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12427 until it is equal to ROUNDED_SIZE. */
12429 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12432 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12433 that SIZE is equal to ROUNDED_SIZE. */
12435 if (size != rounded_size)
12436 emit_stack_probe (plus_constant (Pmode,
12437 gen_rtx_PLUS (Pmode,
12440 rounded_size - size));
12442 release_scratch_register_on_entry (&sr);
12445 /* Make sure nothing is scheduled before we are done. */
12446 emit_insn (gen_blockage ());
12449 /* Probe a range of stack addresses from REG to END, inclusive. These are
12450 offsets from the current stack pointer. */
12453 output_probe_stack_range (rtx reg, rtx end)
12455 static int labelno = 0;
12459 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12462 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12464 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12466 xops[1] = GEN_INT (PROBE_INTERVAL);
12467 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12469 /* Probe at TEST_ADDR. */
12470 xops[0] = stack_pointer_rtx;
12472 xops[2] = const0_rtx;
12473 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12475 /* Test if TEST_ADDR == LAST_ADDR. */
12478 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12481 fputs ("\tjne\t", asm_out_file);
12482 assemble_name_raw (asm_out_file, loop_lab);
12483 fputc ('\n', asm_out_file);
12488 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12489 to be generated in correct form. */
12491 ix86_finalize_stack_realign_flags (void)
12493 /* Check if stack realign is really needed after reload, and
12494 stores result in cfun */
12495 unsigned int incoming_stack_boundary
12496 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12497 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12498 unsigned int stack_realign = (incoming_stack_boundary
12500 ? crtl->max_used_stack_slot_alignment
12501 : crtl->stack_alignment_needed));
12503 if (crtl->stack_realign_finalized)
12505 /* After stack_realign_needed is finalized, we can't no longer
12507 gcc_assert (crtl->stack_realign_needed == stack_realign);
12511 /* If the only reason for frame_pointer_needed is that we conservatively
12512 assumed stack realignment might be needed, but in the end nothing that
12513 needed the stack alignment had been spilled, clear frame_pointer_needed
12514 and say we don't need stack realignment. */
12516 && frame_pointer_needed
12518 && flag_omit_frame_pointer
12519 && crtl->sp_is_unchanging
12520 && !ix86_current_function_calls_tls_descriptor
12521 && !crtl->accesses_prior_frames
12522 && !cfun->calls_alloca
12523 && !crtl->calls_eh_return
12524 /* See ira_setup_eliminable_regset for the rationale. */
12525 && !(STACK_CHECK_MOVING_SP
12526 && flag_stack_check
12528 && cfun->can_throw_non_call_exceptions)
12529 && !ix86_frame_pointer_required ()
12530 && get_frame_size () == 0
12531 && ix86_nsaved_sseregs () == 0
12532 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12534 HARD_REG_SET set_up_by_prologue, prologue_used;
12537 CLEAR_HARD_REG_SET (prologue_used);
12538 CLEAR_HARD_REG_SET (set_up_by_prologue);
12539 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12540 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12541 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12542 HARD_FRAME_POINTER_REGNUM);
12543 FOR_EACH_BB_FN (bb, cfun)
12546 FOR_BB_INSNS (bb, insn)
12547 if (NONDEBUG_INSN_P (insn)
12548 && requires_stack_frame_p (insn, prologue_used,
12549 set_up_by_prologue))
12551 crtl->stack_realign_needed = stack_realign;
12552 crtl->stack_realign_finalized = true;
12557 /* If drap has been set, but it actually isn't live at the start
12558 of the function, there is no reason to set it up. */
12559 if (crtl->drap_reg)
12561 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12562 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12564 crtl->drap_reg = NULL_RTX;
12565 crtl->need_drap = false;
12569 cfun->machine->no_drap_save_restore = true;
12571 frame_pointer_needed = false;
12572 stack_realign = false;
12573 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12574 crtl->stack_alignment_needed = incoming_stack_boundary;
12575 crtl->stack_alignment_estimated = incoming_stack_boundary;
12576 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12577 crtl->preferred_stack_boundary = incoming_stack_boundary;
12578 df_finish_pass (true);
12579 df_scan_alloc (NULL);
12581 df_compute_regs_ever_live (true);
12585 crtl->stack_realign_needed = stack_realign;
12586 crtl->stack_realign_finalized = true;
12589 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12592 ix86_elim_entry_set_got (rtx reg)
12594 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12595 rtx_insn *c_insn = BB_HEAD (bb);
12596 if (!NONDEBUG_INSN_P (c_insn))
12597 c_insn = next_nonnote_nondebug_insn (c_insn);
12598 if (c_insn && NONJUMP_INSN_P (c_insn))
12600 rtx pat = PATTERN (c_insn);
12601 if (GET_CODE (pat) == PARALLEL)
12603 rtx vec = XVECEXP (pat, 0, 0);
12604 if (GET_CODE (vec) == SET
12605 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12606 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12607 delete_insn (c_insn);
12612 /* Expand the prologue into a bunch of separate insns. */
12615 ix86_expand_prologue (void)
12617 struct machine_function *m = cfun->machine;
12619 struct ix86_frame frame;
12620 HOST_WIDE_INT allocate;
12621 bool int_registers_saved;
12622 bool sse_registers_saved;
12623 rtx static_chain = NULL_RTX;
12625 ix86_finalize_stack_realign_flags ();
12627 /* DRAP should not coexist with stack_realign_fp */
12628 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12630 memset (&m->fs, 0, sizeof (m->fs));
12632 /* Initialize CFA state for before the prologue. */
12633 m->fs.cfa_reg = stack_pointer_rtx;
12634 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12636 /* Track SP offset to the CFA. We continue tracking this after we've
12637 swapped the CFA register away from SP. In the case of re-alignment
12638 this is fudged; we're interested to offsets within the local frame. */
12639 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12640 m->fs.sp_valid = true;
12642 ix86_compute_frame_layout (&frame);
12644 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12646 /* We should have already generated an error for any use of
12647 ms_hook on a nested function. */
12648 gcc_checking_assert (!ix86_static_chain_on_stack);
12650 /* Check if profiling is active and we shall use profiling before
12651 prologue variant. If so sorry. */
12652 if (crtl->profile && flag_fentry != 0)
12653 sorry ("ms_hook_prologue attribute isn%'t compatible "
12654 "with -mfentry for 32-bit");
12656 /* In ix86_asm_output_function_label we emitted:
12657 8b ff movl.s %edi,%edi
12659 8b ec movl.s %esp,%ebp
12661 This matches the hookable function prologue in Win32 API
12662 functions in Microsoft Windows XP Service Pack 2 and newer.
12663 Wine uses this to enable Windows apps to hook the Win32 API
12664 functions provided by Wine.
12666 What that means is that we've already set up the frame pointer. */
12668 if (frame_pointer_needed
12669 && !(crtl->drap_reg && crtl->stack_realign_needed))
12673 /* We've decided to use the frame pointer already set up.
12674 Describe this to the unwinder by pretending that both
12675 push and mov insns happen right here.
12677 Putting the unwind info here at the end of the ms_hook
12678 is done so that we can make absolutely certain we get
12679 the required byte sequence at the start of the function,
12680 rather than relying on an assembler that can produce
12681 the exact encoding required.
12683 However it does mean (in the unpatched case) that we have
12684 a 1 insn window where the asynchronous unwind info is
12685 incorrect. However, if we placed the unwind info at
12686 its correct location we would have incorrect unwind info
12687 in the patched case. Which is probably all moot since
12688 I don't expect Wine generates dwarf2 unwind info for the
12689 system libraries that use this feature. */
12691 insn = emit_insn (gen_blockage ());
12693 push = gen_push (hard_frame_pointer_rtx);
12694 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12695 stack_pointer_rtx);
12696 RTX_FRAME_RELATED_P (push) = 1;
12697 RTX_FRAME_RELATED_P (mov) = 1;
12699 RTX_FRAME_RELATED_P (insn) = 1;
12700 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12701 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12703 /* Note that gen_push incremented m->fs.cfa_offset, even
12704 though we didn't emit the push insn here. */
12705 m->fs.cfa_reg = hard_frame_pointer_rtx;
12706 m->fs.fp_offset = m->fs.cfa_offset;
12707 m->fs.fp_valid = true;
12711 /* The frame pointer is not needed so pop %ebp again.
12712 This leaves us with a pristine state. */
12713 emit_insn (gen_pop (hard_frame_pointer_rtx));
12717 /* The first insn of a function that accepts its static chain on the
12718 stack is to push the register that would be filled in by a direct
12719 call. This insn will be skipped by the trampoline. */
12720 else if (ix86_static_chain_on_stack)
12722 static_chain = ix86_static_chain (cfun->decl, false);
12723 insn = emit_insn (gen_push (static_chain));
12724 emit_insn (gen_blockage ());
12726 /* We don't want to interpret this push insn as a register save,
12727 only as a stack adjustment. The real copy of the register as
12728 a save will be done later, if needed. */
12729 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12730 t = gen_rtx_SET (stack_pointer_rtx, t);
12731 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12732 RTX_FRAME_RELATED_P (insn) = 1;
12735 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12736 of DRAP is needed and stack realignment is really needed after reload */
12737 if (stack_realign_drap)
12739 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12741 /* Only need to push parameter pointer reg if it is caller saved. */
12742 if (!call_used_regs[REGNO (crtl->drap_reg)])
12744 /* Push arg pointer reg */
12745 insn = emit_insn (gen_push (crtl->drap_reg));
12746 RTX_FRAME_RELATED_P (insn) = 1;
12749 /* Grab the argument pointer. */
12750 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12751 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12752 RTX_FRAME_RELATED_P (insn) = 1;
12753 m->fs.cfa_reg = crtl->drap_reg;
12754 m->fs.cfa_offset = 0;
12756 /* Align the stack. */
12757 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12759 GEN_INT (-align_bytes)));
12760 RTX_FRAME_RELATED_P (insn) = 1;
12762 /* Replicate the return address on the stack so that return
12763 address can be reached via (argp - 1) slot. This is needed
12764 to implement macro RETURN_ADDR_RTX and intrinsic function
12765 expand_builtin_return_addr etc. */
12766 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12767 t = gen_frame_mem (word_mode, t);
12768 insn = emit_insn (gen_push (t));
12769 RTX_FRAME_RELATED_P (insn) = 1;
12771 /* For the purposes of frame and register save area addressing,
12772 we've started over with a new frame. */
12773 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12774 m->fs.realigned = true;
12778 /* Replicate static chain on the stack so that static chain
12779 can be reached via (argp - 2) slot. This is needed for
12780 nested function with stack realignment. */
12781 insn = emit_insn (gen_push (static_chain));
12782 RTX_FRAME_RELATED_P (insn) = 1;
12786 int_registers_saved = (frame.nregs == 0);
12787 sse_registers_saved = (frame.nsseregs == 0);
12789 if (frame_pointer_needed && !m->fs.fp_valid)
12791 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12792 slower on all targets. Also sdb doesn't like it. */
12793 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12794 RTX_FRAME_RELATED_P (insn) = 1;
12796 /* Push registers now, before setting the frame pointer
12798 if (!int_registers_saved
12800 && !frame.save_regs_using_mov)
12802 ix86_emit_save_regs ();
12803 int_registers_saved = true;
12804 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12807 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12809 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12810 RTX_FRAME_RELATED_P (insn) = 1;
12812 if (m->fs.cfa_reg == stack_pointer_rtx)
12813 m->fs.cfa_reg = hard_frame_pointer_rtx;
12814 m->fs.fp_offset = m->fs.sp_offset;
12815 m->fs.fp_valid = true;
12819 if (!int_registers_saved)
12821 /* If saving registers via PUSH, do so now. */
12822 if (!frame.save_regs_using_mov)
12824 ix86_emit_save_regs ();
12825 int_registers_saved = true;
12826 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12829 /* When using red zone we may start register saving before allocating
12830 the stack frame saving one cycle of the prologue. However, avoid
12831 doing this if we have to probe the stack; at least on x86_64 the
12832 stack probe can turn into a call that clobbers a red zone location. */
12833 else if (ix86_using_red_zone ()
12834 && (! TARGET_STACK_PROBE
12835 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12837 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12838 int_registers_saved = true;
12842 if (stack_realign_fp)
12844 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12845 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12847 /* The computation of the size of the re-aligned stack frame means
12848 that we must allocate the size of the register save area before
12849 performing the actual alignment. Otherwise we cannot guarantee
12850 that there's enough storage above the realignment point. */
12851 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12852 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12853 GEN_INT (m->fs.sp_offset
12854 - frame.sse_reg_save_offset),
12857 /* Align the stack. */
12858 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12860 GEN_INT (-align_bytes)));
12862 /* For the purposes of register save area addressing, the stack
12863 pointer is no longer valid. As for the value of sp_offset,
12864 see ix86_compute_frame_layout, which we need to match in order
12865 to pass verification of stack_pointer_offset at the end. */
12866 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12867 m->fs.sp_valid = false;
12870 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12872 if (flag_stack_usage_info)
12874 /* We start to count from ARG_POINTER. */
12875 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12877 /* If it was realigned, take into account the fake frame. */
12878 if (stack_realign_drap)
12880 if (ix86_static_chain_on_stack)
12881 stack_size += UNITS_PER_WORD;
12883 if (!call_used_regs[REGNO (crtl->drap_reg)])
12884 stack_size += UNITS_PER_WORD;
12886 /* This over-estimates by 1 minimal-stack-alignment-unit but
12887 mitigates that by counting in the new return address slot. */
12888 current_function_dynamic_stack_size
12889 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12892 current_function_static_stack_size = stack_size;
12895 /* On SEH target with very large frame size, allocate an area to save
12896 SSE registers (as the very large allocation won't be described). */
12898 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12899 && !sse_registers_saved)
12901 HOST_WIDE_INT sse_size =
12902 frame.sse_reg_save_offset - frame.reg_save_offset;
12904 gcc_assert (int_registers_saved);
12906 /* No need to do stack checking as the area will be immediately
12908 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12909 GEN_INT (-sse_size), -1,
12910 m->fs.cfa_reg == stack_pointer_rtx);
12911 allocate -= sse_size;
12912 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12913 sse_registers_saved = true;
12916 /* The stack has already been decremented by the instruction calling us
12917 so probe if the size is non-negative to preserve the protection area. */
12918 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12920 /* We expect the registers to be saved when probes are used. */
12921 gcc_assert (int_registers_saved);
12923 if (STACK_CHECK_MOVING_SP)
12925 if (!(crtl->is_leaf && !cfun->calls_alloca
12926 && allocate <= PROBE_INTERVAL))
12928 ix86_adjust_stack_and_probe (allocate);
12934 HOST_WIDE_INT size = allocate;
12936 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12937 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12939 if (TARGET_STACK_PROBE)
12941 if (crtl->is_leaf && !cfun->calls_alloca)
12943 if (size > PROBE_INTERVAL)
12944 ix86_emit_probe_stack_range (0, size);
12947 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12951 if (crtl->is_leaf && !cfun->calls_alloca)
12953 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12954 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12955 size - STACK_CHECK_PROTECT);
12958 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
12965 else if (!ix86_target_stack_probe ()
12966 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
12968 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12969 GEN_INT (-allocate), -1,
12970 m->fs.cfa_reg == stack_pointer_rtx);
12974 rtx eax = gen_rtx_REG (Pmode, AX_REG);
12976 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
12977 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
12978 bool eax_live = ix86_eax_live_at_start_p ();
12979 bool r10_live = false;
12982 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
12986 insn = emit_insn (gen_push (eax));
12987 allocate -= UNITS_PER_WORD;
12988 /* Note that SEH directives need to continue tracking the stack
12989 pointer even after the frame pointer has been set up. */
12990 if (sp_is_cfa_reg || TARGET_SEH)
12993 m->fs.cfa_offset += UNITS_PER_WORD;
12994 RTX_FRAME_RELATED_P (insn) = 1;
12995 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12996 gen_rtx_SET (stack_pointer_rtx,
12997 plus_constant (Pmode, stack_pointer_rtx,
12998 -UNITS_PER_WORD)));
13004 r10 = gen_rtx_REG (Pmode, R10_REG);
13005 insn = emit_insn (gen_push (r10));
13006 allocate -= UNITS_PER_WORD;
13007 if (sp_is_cfa_reg || TARGET_SEH)
13010 m->fs.cfa_offset += UNITS_PER_WORD;
13011 RTX_FRAME_RELATED_P (insn) = 1;
13012 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13013 gen_rtx_SET (stack_pointer_rtx,
13014 plus_constant (Pmode, stack_pointer_rtx,
13015 -UNITS_PER_WORD)));
13019 emit_move_insn (eax, GEN_INT (allocate));
13020 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13022 /* Use the fact that AX still contains ALLOCATE. */
13023 adjust_stack_insn = (Pmode == DImode
13024 ? gen_pro_epilogue_adjust_stack_di_sub
13025 : gen_pro_epilogue_adjust_stack_si_sub);
13027 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13028 stack_pointer_rtx, eax));
13030 if (sp_is_cfa_reg || TARGET_SEH)
13033 m->fs.cfa_offset += allocate;
13034 RTX_FRAME_RELATED_P (insn) = 1;
13035 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13036 gen_rtx_SET (stack_pointer_rtx,
13037 plus_constant (Pmode, stack_pointer_rtx,
13040 m->fs.sp_offset += allocate;
13042 /* Use stack_pointer_rtx for relative addressing so that code
13043 works for realigned stack, too. */
13044 if (r10_live && eax_live)
13046 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13047 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13048 gen_frame_mem (word_mode, t));
13049 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13050 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13051 gen_frame_mem (word_mode, t));
13053 else if (eax_live || r10_live)
13055 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13056 emit_move_insn (gen_rtx_REG (word_mode,
13057 (eax_live ? AX_REG : R10_REG)),
13058 gen_frame_mem (word_mode, t));
13061 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13063 /* If we havn't already set up the frame pointer, do so now. */
13064 if (frame_pointer_needed && !m->fs.fp_valid)
13066 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13067 GEN_INT (frame.stack_pointer_offset
13068 - frame.hard_frame_pointer_offset));
13069 insn = emit_insn (insn);
13070 RTX_FRAME_RELATED_P (insn) = 1;
13071 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13073 if (m->fs.cfa_reg == stack_pointer_rtx)
13074 m->fs.cfa_reg = hard_frame_pointer_rtx;
13075 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13076 m->fs.fp_valid = true;
13079 if (!int_registers_saved)
13080 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13081 if (!sse_registers_saved)
13082 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13084 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13086 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13088 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13089 insn = emit_insn (gen_set_got (pic));
13090 RTX_FRAME_RELATED_P (insn) = 1;
13091 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13092 emit_insn (gen_prologue_use (pic));
13093 /* Deleting already emmitted SET_GOT if exist and allocated to
13094 REAL_PIC_OFFSET_TABLE_REGNUM. */
13095 ix86_elim_entry_set_got (pic);
13098 if (crtl->drap_reg && !crtl->stack_realign_needed)
13100 /* vDRAP is setup but after reload it turns out stack realign
13101 isn't necessary, here we will emit prologue to setup DRAP
13102 without stack realign adjustment */
13103 t = choose_baseaddr (0);
13104 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13107 /* Prevent instructions from being scheduled into register save push
13108 sequence when access to the redzone area is done through frame pointer.
13109 The offset between the frame pointer and the stack pointer is calculated
13110 relative to the value of the stack pointer at the end of the function
13111 prologue, and moving instructions that access redzone area via frame
13112 pointer inside push sequence violates this assumption. */
13113 if (frame_pointer_needed && frame.red_zone_size)
13114 emit_insn (gen_memory_blockage ());
13116 /* Emit cld instruction if stringops are used in the function. */
13117 if (TARGET_CLD && ix86_current_function_needs_cld)
13118 emit_insn (gen_cld ());
13120 /* SEH requires that the prologue end within 256 bytes of the start of
13121 the function. Prevent instruction schedules that would extend that.
13122 Further, prevent alloca modifications to the stack pointer from being
13123 combined with prologue modifications. */
13125 emit_insn (gen_prologue_use (stack_pointer_rtx));
13128 /* Emit code to restore REG using a POP insn. */
13131 ix86_emit_restore_reg_using_pop (rtx reg)
13133 struct machine_function *m = cfun->machine;
13134 rtx_insn *insn = emit_insn (gen_pop (reg));
13136 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13137 m->fs.sp_offset -= UNITS_PER_WORD;
13139 if (m->fs.cfa_reg == crtl->drap_reg
13140 && REGNO (reg) == REGNO (crtl->drap_reg))
13142 /* Previously we'd represented the CFA as an expression
13143 like *(%ebp - 8). We've just popped that value from
13144 the stack, which means we need to reset the CFA to
13145 the drap register. This will remain until we restore
13146 the stack pointer. */
13147 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13148 RTX_FRAME_RELATED_P (insn) = 1;
13150 /* This means that the DRAP register is valid for addressing too. */
13151 m->fs.drap_valid = true;
13155 if (m->fs.cfa_reg == stack_pointer_rtx)
13157 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13158 x = gen_rtx_SET (stack_pointer_rtx, x);
13159 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13160 RTX_FRAME_RELATED_P (insn) = 1;
13162 m->fs.cfa_offset -= UNITS_PER_WORD;
13165 /* When the frame pointer is the CFA, and we pop it, we are
13166 swapping back to the stack pointer as the CFA. This happens
13167 for stack frames that don't allocate other data, so we assume
13168 the stack pointer is now pointing at the return address, i.e.
13169 the function entry state, which makes the offset be 1 word. */
13170 if (reg == hard_frame_pointer_rtx)
13172 m->fs.fp_valid = false;
13173 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13175 m->fs.cfa_reg = stack_pointer_rtx;
13176 m->fs.cfa_offset -= UNITS_PER_WORD;
13178 add_reg_note (insn, REG_CFA_DEF_CFA,
13179 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13180 GEN_INT (m->fs.cfa_offset)));
13181 RTX_FRAME_RELATED_P (insn) = 1;
13186 /* Emit code to restore saved registers using POP insns. */
13189 ix86_emit_restore_regs_using_pop (void)
13191 unsigned int regno;
13193 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13194 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13195 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13198 /* Emit code and notes for the LEAVE instruction. */
13201 ix86_emit_leave (void)
13203 struct machine_function *m = cfun->machine;
13204 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13206 ix86_add_queued_cfa_restore_notes (insn);
13208 gcc_assert (m->fs.fp_valid);
13209 m->fs.sp_valid = true;
13210 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13211 m->fs.fp_valid = false;
13213 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13215 m->fs.cfa_reg = stack_pointer_rtx;
13216 m->fs.cfa_offset = m->fs.sp_offset;
13218 add_reg_note (insn, REG_CFA_DEF_CFA,
13219 plus_constant (Pmode, stack_pointer_rtx,
13221 RTX_FRAME_RELATED_P (insn) = 1;
13223 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13227 /* Emit code to restore saved registers using MOV insns.
13228 First register is restored from CFA - CFA_OFFSET. */
13230 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13231 bool maybe_eh_return)
13233 struct machine_function *m = cfun->machine;
13234 unsigned int regno;
13236 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13237 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13239 rtx reg = gen_rtx_REG (word_mode, regno);
13243 mem = choose_baseaddr (cfa_offset);
13244 mem = gen_frame_mem (word_mode, mem);
13245 insn = emit_move_insn (reg, mem);
13247 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13249 /* Previously we'd represented the CFA as an expression
13250 like *(%ebp - 8). We've just popped that value from
13251 the stack, which means we need to reset the CFA to
13252 the drap register. This will remain until we restore
13253 the stack pointer. */
13254 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13255 RTX_FRAME_RELATED_P (insn) = 1;
13257 /* This means that the DRAP register is valid for addressing. */
13258 m->fs.drap_valid = true;
13261 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13263 cfa_offset -= UNITS_PER_WORD;
13267 /* Emit code to restore saved registers using MOV insns.
13268 First register is restored from CFA - CFA_OFFSET. */
13270 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13271 bool maybe_eh_return)
13273 unsigned int regno;
13275 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13276 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13278 rtx reg = gen_rtx_REG (V4SFmode, regno);
13280 unsigned int align;
13282 mem = choose_baseaddr (cfa_offset);
13283 mem = gen_rtx_MEM (V4SFmode, mem);
13285 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13286 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13287 set_mem_align (mem, align);
13289 /* SSE saves are not within re-aligned local stack frame.
13290 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13291 to emit unaligned load. */
13294 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13296 emit_insn (gen_rtx_SET (reg, unspec));
13299 emit_insn (gen_rtx_SET (reg, mem));
13301 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13303 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13307 /* Restore function stack, frame, and registers. */
13310 ix86_expand_epilogue (int style)
13312 struct machine_function *m = cfun->machine;
13313 struct machine_frame_state frame_state_save = m->fs;
13314 struct ix86_frame frame;
13315 bool restore_regs_via_mov;
13318 ix86_finalize_stack_realign_flags ();
13319 ix86_compute_frame_layout (&frame);
13321 m->fs.sp_valid = (!frame_pointer_needed
13322 || (crtl->sp_is_unchanging
13323 && !stack_realign_fp));
13324 gcc_assert (!m->fs.sp_valid
13325 || m->fs.sp_offset == frame.stack_pointer_offset);
13327 /* The FP must be valid if the frame pointer is present. */
13328 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13329 gcc_assert (!m->fs.fp_valid
13330 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13332 /* We must have *some* valid pointer to the stack frame. */
13333 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13335 /* The DRAP is never valid at this point. */
13336 gcc_assert (!m->fs.drap_valid);
13338 /* See the comment about red zone and frame
13339 pointer usage in ix86_expand_prologue. */
13340 if (frame_pointer_needed && frame.red_zone_size)
13341 emit_insn (gen_memory_blockage ());
13343 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13344 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13346 /* Determine the CFA offset of the end of the red-zone. */
13347 m->fs.red_zone_offset = 0;
13348 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13350 /* The red-zone begins below the return address. */
13351 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13353 /* When the register save area is in the aligned portion of
13354 the stack, determine the maximum runtime displacement that
13355 matches up with the aligned frame. */
13356 if (stack_realign_drap)
13357 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13361 /* Special care must be taken for the normal return case of a function
13362 using eh_return: the eax and edx registers are marked as saved, but
13363 not restored along this path. Adjust the save location to match. */
13364 if (crtl->calls_eh_return && style != 2)
13365 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13367 /* EH_RETURN requires the use of moves to function properly. */
13368 if (crtl->calls_eh_return)
13369 restore_regs_via_mov = true;
13370 /* SEH requires the use of pops to identify the epilogue. */
13371 else if (TARGET_SEH)
13372 restore_regs_via_mov = false;
13373 /* If we're only restoring one register and sp is not valid then
13374 using a move instruction to restore the register since it's
13375 less work than reloading sp and popping the register. */
13376 else if (!m->fs.sp_valid && frame.nregs <= 1)
13377 restore_regs_via_mov = true;
13378 else if (TARGET_EPILOGUE_USING_MOVE
13379 && cfun->machine->use_fast_prologue_epilogue
13380 && (frame.nregs > 1
13381 || m->fs.sp_offset != frame.reg_save_offset))
13382 restore_regs_via_mov = true;
13383 else if (frame_pointer_needed
13385 && m->fs.sp_offset != frame.reg_save_offset)
13386 restore_regs_via_mov = true;
13387 else if (frame_pointer_needed
13388 && TARGET_USE_LEAVE
13389 && cfun->machine->use_fast_prologue_epilogue
13390 && frame.nregs == 1)
13391 restore_regs_via_mov = true;
13393 restore_regs_via_mov = false;
13395 if (restore_regs_via_mov || frame.nsseregs)
13397 /* Ensure that the entire register save area is addressable via
13398 the stack pointer, if we will restore via sp. */
13400 && m->fs.sp_offset > 0x7fffffff
13401 && !(m->fs.fp_valid || m->fs.drap_valid)
13402 && (frame.nsseregs + frame.nregs) != 0)
13404 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13405 GEN_INT (m->fs.sp_offset
13406 - frame.sse_reg_save_offset),
13408 m->fs.cfa_reg == stack_pointer_rtx);
13412 /* If there are any SSE registers to restore, then we have to do it
13413 via moves, since there's obviously no pop for SSE regs. */
13414 if (frame.nsseregs)
13415 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13418 if (restore_regs_via_mov)
13423 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13425 /* eh_return epilogues need %ecx added to the stack pointer. */
13428 rtx sa = EH_RETURN_STACKADJ_RTX;
13431 /* Stack align doesn't work with eh_return. */
13432 gcc_assert (!stack_realign_drap);
13433 /* Neither does regparm nested functions. */
13434 gcc_assert (!ix86_static_chain_on_stack);
13436 if (frame_pointer_needed)
13438 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13439 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13440 emit_insn (gen_rtx_SET (sa, t));
13442 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13443 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13445 /* Note that we use SA as a temporary CFA, as the return
13446 address is at the proper place relative to it. We
13447 pretend this happens at the FP restore insn because
13448 prior to this insn the FP would be stored at the wrong
13449 offset relative to SA, and after this insn we have no
13450 other reasonable register to use for the CFA. We don't
13451 bother resetting the CFA to the SP for the duration of
13452 the return insn. */
13453 add_reg_note (insn, REG_CFA_DEF_CFA,
13454 plus_constant (Pmode, sa, UNITS_PER_WORD));
13455 ix86_add_queued_cfa_restore_notes (insn);
13456 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13457 RTX_FRAME_RELATED_P (insn) = 1;
13459 m->fs.cfa_reg = sa;
13460 m->fs.cfa_offset = UNITS_PER_WORD;
13461 m->fs.fp_valid = false;
13463 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13464 const0_rtx, style, false);
13468 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13469 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13470 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13471 ix86_add_queued_cfa_restore_notes (insn);
13473 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13474 if (m->fs.cfa_offset != UNITS_PER_WORD)
13476 m->fs.cfa_offset = UNITS_PER_WORD;
13477 add_reg_note (insn, REG_CFA_DEF_CFA,
13478 plus_constant (Pmode, stack_pointer_rtx,
13480 RTX_FRAME_RELATED_P (insn) = 1;
13483 m->fs.sp_offset = UNITS_PER_WORD;
13484 m->fs.sp_valid = true;
13489 /* SEH requires that the function end with (1) a stack adjustment
13490 if necessary, (2) a sequence of pops, and (3) a return or
13491 jump instruction. Prevent insns from the function body from
13492 being scheduled into this sequence. */
13495 /* Prevent a catch region from being adjacent to the standard
13496 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13497 several other flags that would be interesting to test are
13499 if (flag_non_call_exceptions)
13500 emit_insn (gen_nops (const1_rtx));
13502 emit_insn (gen_blockage ());
13505 /* First step is to deallocate the stack frame so that we can
13506 pop the registers. Also do it on SEH target for very large
13507 frame as the emitted instructions aren't allowed by the ABI in
13509 if (!m->fs.sp_valid
13511 && (m->fs.sp_offset - frame.reg_save_offset
13512 >= SEH_MAX_FRAME_SIZE)))
13514 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13515 GEN_INT (m->fs.fp_offset
13516 - frame.reg_save_offset),
13519 else if (m->fs.sp_offset != frame.reg_save_offset)
13521 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13522 GEN_INT (m->fs.sp_offset
13523 - frame.reg_save_offset),
13525 m->fs.cfa_reg == stack_pointer_rtx);
13528 ix86_emit_restore_regs_using_pop ();
13531 /* If we used a stack pointer and haven't already got rid of it,
13533 if (m->fs.fp_valid)
13535 /* If the stack pointer is valid and pointing at the frame
13536 pointer store address, then we only need a pop. */
13537 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13538 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13539 /* Leave results in shorter dependency chains on CPUs that are
13540 able to grok it fast. */
13541 else if (TARGET_USE_LEAVE
13542 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13543 || !cfun->machine->use_fast_prologue_epilogue)
13544 ix86_emit_leave ();
13547 pro_epilogue_adjust_stack (stack_pointer_rtx,
13548 hard_frame_pointer_rtx,
13549 const0_rtx, style, !using_drap);
13550 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13556 int param_ptr_offset = UNITS_PER_WORD;
13559 gcc_assert (stack_realign_drap);
13561 if (ix86_static_chain_on_stack)
13562 param_ptr_offset += UNITS_PER_WORD;
13563 if (!call_used_regs[REGNO (crtl->drap_reg)])
13564 param_ptr_offset += UNITS_PER_WORD;
13566 insn = emit_insn (gen_rtx_SET
13567 (stack_pointer_rtx,
13568 gen_rtx_PLUS (Pmode,
13570 GEN_INT (-param_ptr_offset))));
13571 m->fs.cfa_reg = stack_pointer_rtx;
13572 m->fs.cfa_offset = param_ptr_offset;
13573 m->fs.sp_offset = param_ptr_offset;
13574 m->fs.realigned = false;
13576 add_reg_note (insn, REG_CFA_DEF_CFA,
13577 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13578 GEN_INT (param_ptr_offset)));
13579 RTX_FRAME_RELATED_P (insn) = 1;
13581 if (!call_used_regs[REGNO (crtl->drap_reg)])
13582 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13585 /* At this point the stack pointer must be valid, and we must have
13586 restored all of the registers. We may not have deallocated the
13587 entire stack frame. We've delayed this until now because it may
13588 be possible to merge the local stack deallocation with the
13589 deallocation forced by ix86_static_chain_on_stack. */
13590 gcc_assert (m->fs.sp_valid);
13591 gcc_assert (!m->fs.fp_valid);
13592 gcc_assert (!m->fs.realigned);
13593 if (m->fs.sp_offset != UNITS_PER_WORD)
13595 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13596 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13600 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13602 /* Sibcall epilogues don't want a return instruction. */
13605 m->fs = frame_state_save;
13609 if (crtl->args.pops_args && crtl->args.size)
13611 rtx popc = GEN_INT (crtl->args.pops_args);
13613 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13614 address, do explicit add, and jump indirectly to the caller. */
13616 if (crtl->args.pops_args >= 65536)
13618 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13621 /* There is no "pascal" calling convention in any 64bit ABI. */
13622 gcc_assert (!TARGET_64BIT);
13624 insn = emit_insn (gen_pop (ecx));
13625 m->fs.cfa_offset -= UNITS_PER_WORD;
13626 m->fs.sp_offset -= UNITS_PER_WORD;
13628 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13629 x = gen_rtx_SET (stack_pointer_rtx, x);
13630 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13631 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13632 RTX_FRAME_RELATED_P (insn) = 1;
13634 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13636 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13639 emit_jump_insn (gen_simple_return_pop_internal (popc));
13642 emit_jump_insn (gen_simple_return_internal ());
13644 /* Restore the state back to the state from the prologue,
13645 so that it's correct for the next epilogue. */
13646 m->fs = frame_state_save;
13649 /* Reset from the function's potential modifications. */
13652 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13654 if (pic_offset_table_rtx
13655 && !ix86_use_pseudo_pic_reg ())
13656 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13658 /* Mach-O doesn't support labels at the end of objects, so if
13659 it looks like we might want one, insert a NOP. */
13661 rtx_insn *insn = get_last_insn ();
13662 rtx_insn *deleted_debug_label = NULL;
13665 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13667 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13668 notes only, instead set their CODE_LABEL_NUMBER to -1,
13669 otherwise there would be code generation differences
13670 in between -g and -g0. */
13671 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13672 deleted_debug_label = insn;
13673 insn = PREV_INSN (insn);
13678 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13679 fputs ("\tnop\n", file);
13680 else if (deleted_debug_label)
13681 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13682 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13683 CODE_LABEL_NUMBER (insn) = -1;
13689 /* Return a scratch register to use in the split stack prologue. The
13690 split stack prologue is used for -fsplit-stack. It is the first
13691 instructions in the function, even before the regular prologue.
13692 The scratch register can be any caller-saved register which is not
13693 used for parameters or for the static chain. */
13695 static unsigned int
13696 split_stack_prologue_scratch_regno (void)
13702 bool is_fastcall, is_thiscall;
13705 is_fastcall = (lookup_attribute ("fastcall",
13706 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13708 is_thiscall = (lookup_attribute ("thiscall",
13709 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13711 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13715 if (DECL_STATIC_CHAIN (cfun->decl))
13717 sorry ("-fsplit-stack does not support fastcall with "
13718 "nested function");
13719 return INVALID_REGNUM;
13723 else if (is_thiscall)
13725 if (!DECL_STATIC_CHAIN (cfun->decl))
13729 else if (regparm < 3)
13731 if (!DECL_STATIC_CHAIN (cfun->decl))
13737 sorry ("-fsplit-stack does not support 2 register "
13738 "parameters for a nested function");
13739 return INVALID_REGNUM;
13746 /* FIXME: We could make this work by pushing a register
13747 around the addition and comparison. */
13748 sorry ("-fsplit-stack does not support 3 register parameters");
13749 return INVALID_REGNUM;
13754 /* A SYMBOL_REF for the function which allocates new stackspace for
13757 static GTY(()) rtx split_stack_fn;
13759 /* A SYMBOL_REF for the more stack function when using the large
13762 static GTY(()) rtx split_stack_fn_large;
13764 /* Handle -fsplit-stack. These are the first instructions in the
13765 function, even before the regular prologue. */
13768 ix86_expand_split_stack_prologue (void)
13770 struct ix86_frame frame;
13771 HOST_WIDE_INT allocate;
13772 unsigned HOST_WIDE_INT args_size;
13773 rtx_code_label *label;
13774 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13775 rtx scratch_reg = NULL_RTX;
13776 rtx_code_label *varargs_label = NULL;
13779 gcc_assert (flag_split_stack && reload_completed);
13781 ix86_finalize_stack_realign_flags ();
13782 ix86_compute_frame_layout (&frame);
13783 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13785 /* This is the label we will branch to if we have enough stack
13786 space. We expect the basic block reordering pass to reverse this
13787 branch if optimizing, so that we branch in the unlikely case. */
13788 label = gen_label_rtx ();
13790 /* We need to compare the stack pointer minus the frame size with
13791 the stack boundary in the TCB. The stack boundary always gives
13792 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13793 can compare directly. Otherwise we need to do an addition. */
13795 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13796 UNSPEC_STACK_CHECK);
13797 limit = gen_rtx_CONST (Pmode, limit);
13798 limit = gen_rtx_MEM (Pmode, limit);
13799 if (allocate < SPLIT_STACK_AVAILABLE)
13800 current = stack_pointer_rtx;
13803 unsigned int scratch_regno;
13806 /* We need a scratch register to hold the stack pointer minus
13807 the required frame size. Since this is the very start of the
13808 function, the scratch register can be any caller-saved
13809 register which is not used for parameters. */
13810 offset = GEN_INT (- allocate);
13811 scratch_regno = split_stack_prologue_scratch_regno ();
13812 if (scratch_regno == INVALID_REGNUM)
13814 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13815 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13817 /* We don't use ix86_gen_add3 in this case because it will
13818 want to split to lea, but when not optimizing the insn
13819 will not be split after this point. */
13820 emit_insn (gen_rtx_SET (scratch_reg,
13821 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13826 emit_move_insn (scratch_reg, offset);
13827 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13828 stack_pointer_rtx));
13830 current = scratch_reg;
13833 ix86_expand_branch (GEU, current, limit, label);
13834 jump_insn = get_last_insn ();
13835 JUMP_LABEL (jump_insn) = label;
13837 /* Mark the jump as very likely to be taken. */
13838 add_int_reg_note (jump_insn, REG_BR_PROB,
13839 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13841 if (split_stack_fn == NULL_RTX)
13843 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13844 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13846 fn = split_stack_fn;
13848 /* Get more stack space. We pass in the desired stack space and the
13849 size of the arguments to copy to the new stack. In 32-bit mode
13850 we push the parameters; __morestack will return on a new stack
13851 anyhow. In 64-bit mode we pass the parameters in r10 and
13853 allocate_rtx = GEN_INT (allocate);
13854 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13855 call_fusage = NULL_RTX;
13860 reg10 = gen_rtx_REG (Pmode, R10_REG);
13861 reg11 = gen_rtx_REG (Pmode, R11_REG);
13863 /* If this function uses a static chain, it will be in %r10.
13864 Preserve it across the call to __morestack. */
13865 if (DECL_STATIC_CHAIN (cfun->decl))
13869 rax = gen_rtx_REG (word_mode, AX_REG);
13870 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13871 use_reg (&call_fusage, rax);
13874 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13877 HOST_WIDE_INT argval;
13879 gcc_assert (Pmode == DImode);
13880 /* When using the large model we need to load the address
13881 into a register, and we've run out of registers. So we
13882 switch to a different calling convention, and we call a
13883 different function: __morestack_large. We pass the
13884 argument size in the upper 32 bits of r10 and pass the
13885 frame size in the lower 32 bits. */
13886 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13887 gcc_assert ((args_size & 0xffffffff) == args_size);
13889 if (split_stack_fn_large == NULL_RTX)
13891 split_stack_fn_large =
13892 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13893 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13895 if (ix86_cmodel == CM_LARGE_PIC)
13897 rtx_code_label *label;
13900 label = gen_label_rtx ();
13901 emit_label (label);
13902 LABEL_PRESERVE_P (label) = 1;
13903 emit_insn (gen_set_rip_rex64 (reg10, label));
13904 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13905 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13906 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13908 x = gen_rtx_CONST (Pmode, x);
13909 emit_move_insn (reg11, x);
13910 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13911 x = gen_const_mem (Pmode, x);
13912 emit_move_insn (reg11, x);
13915 emit_move_insn (reg11, split_stack_fn_large);
13919 argval = ((args_size << 16) << 16) + allocate;
13920 emit_move_insn (reg10, GEN_INT (argval));
13924 emit_move_insn (reg10, allocate_rtx);
13925 emit_move_insn (reg11, GEN_INT (args_size));
13926 use_reg (&call_fusage, reg11);
13929 use_reg (&call_fusage, reg10);
13933 emit_insn (gen_push (GEN_INT (args_size)));
13934 emit_insn (gen_push (allocate_rtx));
13936 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13937 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13939 add_function_usage_to (call_insn, call_fusage);
13941 /* In order to make call/return prediction work right, we now need
13942 to execute a return instruction. See
13943 libgcc/config/i386/morestack.S for the details on how this works.
13945 For flow purposes gcc must not see this as a return
13946 instruction--we need control flow to continue at the subsequent
13947 label. Therefore, we use an unspec. */
13948 gcc_assert (crtl->args.pops_args < 65536);
13949 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13951 /* If we are in 64-bit mode and this function uses a static chain,
13952 we saved %r10 in %rax before calling _morestack. */
13953 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13954 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13955 gen_rtx_REG (word_mode, AX_REG));
13957 /* If this function calls va_start, we need to store a pointer to
13958 the arguments on the old stack, because they may not have been
13959 all copied to the new stack. At this point the old stack can be
13960 found at the frame pointer value used by __morestack, because
13961 __morestack has set that up before calling back to us. Here we
13962 store that pointer in a scratch register, and in
13963 ix86_expand_prologue we store the scratch register in a stack
13965 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13967 unsigned int scratch_regno;
13971 scratch_regno = split_stack_prologue_scratch_regno ();
13972 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13973 frame_reg = gen_rtx_REG (Pmode, BP_REG);
13977 return address within this function
13978 return address of caller of this function
13980 So we add three words to get to the stack arguments.
13984 return address within this function
13985 first argument to __morestack
13986 second argument to __morestack
13987 return address of caller of this function
13989 So we add five words to get to the stack arguments.
13991 words = TARGET_64BIT ? 3 : 5;
13992 emit_insn (gen_rtx_SET (scratch_reg,
13993 gen_rtx_PLUS (Pmode, frame_reg,
13994 GEN_INT (words * UNITS_PER_WORD))));
13996 varargs_label = gen_label_rtx ();
13997 emit_jump_insn (gen_jump (varargs_label));
13998 JUMP_LABEL (get_last_insn ()) = varargs_label;
14003 emit_label (label);
14004 LABEL_NUSES (label) = 1;
14006 /* If this function calls va_start, we now have to set the scratch
14007 register for the case where we do not call __morestack. In this
14008 case we need to set it based on the stack pointer. */
14009 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14011 emit_insn (gen_rtx_SET (scratch_reg,
14012 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14013 GEN_INT (UNITS_PER_WORD))));
14015 emit_label (varargs_label);
14016 LABEL_NUSES (varargs_label) = 1;
14020 /* We may have to tell the dataflow pass that the split stack prologue
14021 is initializing a scratch register. */
14024 ix86_live_on_entry (bitmap regs)
14026 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14028 gcc_assert (flag_split_stack);
14029 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14033 /* Extract the parts of an RTL expression that is a valid memory address
14034 for an instruction. Return 0 if the structure of the address is
14035 grossly off. Return -1 if the address contains ASHIFT, so it is not
14036 strictly valid, but still used for computing length of lea instruction. */
14039 ix86_decompose_address (rtx addr, struct ix86_address *out)
14041 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14042 rtx base_reg, index_reg;
14043 HOST_WIDE_INT scale = 1;
14044 rtx scale_rtx = NULL_RTX;
14047 addr_space_t seg = ADDR_SPACE_GENERIC;
14049 /* Allow zero-extended SImode addresses,
14050 they will be emitted with addr32 prefix. */
14051 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14053 if (GET_CODE (addr) == ZERO_EXTEND
14054 && GET_MODE (XEXP (addr, 0)) == SImode)
14056 addr = XEXP (addr, 0);
14057 if (CONST_INT_P (addr))
14060 else if (GET_CODE (addr) == AND
14061 && const_32bit_mask (XEXP (addr, 1), DImode))
14063 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
14064 if (addr == NULL_RTX)
14067 if (CONST_INT_P (addr))
14072 /* Allow SImode subregs of DImode addresses,
14073 they will be emitted with addr32 prefix. */
14074 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14076 if (SUBREG_P (addr)
14077 && GET_MODE (SUBREG_REG (addr)) == DImode)
14079 addr = SUBREG_REG (addr);
14080 if (CONST_INT_P (addr))
14087 else if (SUBREG_P (addr))
14089 if (REG_P (SUBREG_REG (addr)))
14094 else if (GET_CODE (addr) == PLUS)
14096 rtx addends[4], op;
14104 addends[n++] = XEXP (op, 1);
14107 while (GET_CODE (op) == PLUS);
14112 for (i = n; i >= 0; --i)
14115 switch (GET_CODE (op))
14120 index = XEXP (op, 0);
14121 scale_rtx = XEXP (op, 1);
14127 index = XEXP (op, 0);
14128 tmp = XEXP (op, 1);
14129 if (!CONST_INT_P (tmp))
14131 scale = INTVAL (tmp);
14132 if ((unsigned HOST_WIDE_INT) scale > 3)
14134 scale = 1 << scale;
14139 if (GET_CODE (op) != UNSPEC)
14144 if (XINT (op, 1) == UNSPEC_TP
14145 && TARGET_TLS_DIRECT_SEG_REFS
14146 && seg == ADDR_SPACE_GENERIC)
14147 seg = DEFAULT_TLS_SEG_REG;
14153 if (!REG_P (SUBREG_REG (op)))
14180 else if (GET_CODE (addr) == MULT)
14182 index = XEXP (addr, 0); /* index*scale */
14183 scale_rtx = XEXP (addr, 1);
14185 else if (GET_CODE (addr) == ASHIFT)
14187 /* We're called for lea too, which implements ashift on occasion. */
14188 index = XEXP (addr, 0);
14189 tmp = XEXP (addr, 1);
14190 if (!CONST_INT_P (tmp))
14192 scale = INTVAL (tmp);
14193 if ((unsigned HOST_WIDE_INT) scale > 3)
14195 scale = 1 << scale;
14199 disp = addr; /* displacement */
14205 else if (SUBREG_P (index)
14206 && REG_P (SUBREG_REG (index)))
14212 /* Extract the integral value of scale. */
14215 if (!CONST_INT_P (scale_rtx))
14217 scale = INTVAL (scale_rtx);
14220 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14221 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14223 /* Avoid useless 0 displacement. */
14224 if (disp == const0_rtx && (base || index))
14227 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14228 if (base_reg && index_reg && scale == 1
14229 && (index_reg == arg_pointer_rtx
14230 || index_reg == frame_pointer_rtx
14231 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14233 std::swap (base, index);
14234 std::swap (base_reg, index_reg);
14237 /* Special case: %ebp cannot be encoded as a base without a displacement.
14241 && (base_reg == hard_frame_pointer_rtx
14242 || base_reg == frame_pointer_rtx
14243 || base_reg == arg_pointer_rtx
14244 || (REG_P (base_reg)
14245 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14246 || REGNO (base_reg) == R13_REG))))
14249 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14250 Avoid this by transforming to [%esi+0].
14251 Reload calls address legitimization without cfun defined, so we need
14252 to test cfun for being non-NULL. */
14253 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14254 && base_reg && !index_reg && !disp
14255 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14258 /* Special case: encode reg+reg instead of reg*2. */
14259 if (!base && index && scale == 2)
14260 base = index, base_reg = index_reg, scale = 1;
14262 /* Special case: scaling cannot be encoded without base or displacement. */
14263 if (!base && !disp && index && scale != 1)
14267 out->index = index;
14269 out->scale = scale;
14275 /* Return cost of the memory address x.
14276 For i386, it is better to use a complex address than let gcc copy
14277 the address into a reg and make a new pseudo. But not if the address
14278 requires to two regs - that would mean more pseudos with longer
14281 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14283 struct ix86_address parts;
14285 int ok = ix86_decompose_address (x, &parts);
14289 if (parts.base && SUBREG_P (parts.base))
14290 parts.base = SUBREG_REG (parts.base);
14291 if (parts.index && SUBREG_P (parts.index))
14292 parts.index = SUBREG_REG (parts.index);
14294 /* Attempt to minimize number of registers in the address by increasing
14295 address cost for each used register. We don't increase address cost
14296 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14297 is not invariant itself it most likely means that base or index is not
14298 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14299 which is not profitable for x86. */
14301 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14302 && (current_pass->type == GIMPLE_PASS
14303 || !pic_offset_table_rtx
14304 || !REG_P (parts.base)
14305 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14309 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14310 && (current_pass->type == GIMPLE_PASS
14311 || !pic_offset_table_rtx
14312 || !REG_P (parts.index)
14313 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14316 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14317 since it's predecode logic can't detect the length of instructions
14318 and it degenerates to vector decoded. Increase cost of such
14319 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14320 to split such addresses or even refuse such addresses at all.
14322 Following addressing modes are affected:
14327 The first and last case may be avoidable by explicitly coding the zero in
14328 memory address, but I don't have AMD-K6 machine handy to check this
14332 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14333 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14334 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14340 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14341 this is used for to form addresses to local data when -fPIC is in
14345 darwin_local_data_pic (rtx disp)
14347 return (GET_CODE (disp) == UNSPEC
14348 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14351 /* Determine if a given RTX is a valid constant. We already know this
14352 satisfies CONSTANT_P. */
14355 ix86_legitimate_constant_p (machine_mode, rtx x)
14357 /* Pointer bounds constants are not valid. */
14358 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14361 switch (GET_CODE (x))
14366 if (GET_CODE (x) == PLUS)
14368 if (!CONST_INT_P (XEXP (x, 1)))
14373 if (TARGET_MACHO && darwin_local_data_pic (x))
14376 /* Only some unspecs are valid as "constants". */
14377 if (GET_CODE (x) == UNSPEC)
14378 switch (XINT (x, 1))
14381 case UNSPEC_GOTOFF:
14382 case UNSPEC_PLTOFF:
14383 return TARGET_64BIT;
14385 case UNSPEC_NTPOFF:
14386 x = XVECEXP (x, 0, 0);
14387 return (GET_CODE (x) == SYMBOL_REF
14388 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14389 case UNSPEC_DTPOFF:
14390 x = XVECEXP (x, 0, 0);
14391 return (GET_CODE (x) == SYMBOL_REF
14392 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14397 /* We must have drilled down to a symbol. */
14398 if (GET_CODE (x) == LABEL_REF)
14400 if (GET_CODE (x) != SYMBOL_REF)
14405 /* TLS symbols are never valid. */
14406 if (SYMBOL_REF_TLS_MODEL (x))
14409 /* DLLIMPORT symbols are never valid. */
14410 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14411 && SYMBOL_REF_DLLIMPORT_P (x))
14415 /* mdynamic-no-pic */
14416 if (MACHO_DYNAMIC_NO_PIC_P)
14417 return machopic_symbol_defined_p (x);
14421 case CONST_WIDE_INT:
14422 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14427 if (!standard_sse_constant_p (x))
14434 /* Otherwise we handle everything else in the move patterns. */
14438 /* Determine if it's legal to put X into the constant pool. This
14439 is not possible for the address of thread-local symbols, which
14440 is checked above. */
14443 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14445 /* We can always put integral constants and vectors in memory. */
14446 switch (GET_CODE (x))
14449 case CONST_WIDE_INT:
14457 return !ix86_legitimate_constant_p (mode, x);
14460 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14464 is_imported_p (rtx x)
14466 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14467 || GET_CODE (x) != SYMBOL_REF)
14470 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14474 /* Nonzero if the constant value X is a legitimate general operand
14475 when generating PIC code. It is given that flag_pic is on and
14476 that X satisfies CONSTANT_P. */
14479 legitimate_pic_operand_p (rtx x)
14483 switch (GET_CODE (x))
14486 inner = XEXP (x, 0);
14487 if (GET_CODE (inner) == PLUS
14488 && CONST_INT_P (XEXP (inner, 1)))
14489 inner = XEXP (inner, 0);
14491 /* Only some unspecs are valid as "constants". */
14492 if (GET_CODE (inner) == UNSPEC)
14493 switch (XINT (inner, 1))
14496 case UNSPEC_GOTOFF:
14497 case UNSPEC_PLTOFF:
14498 return TARGET_64BIT;
14500 x = XVECEXP (inner, 0, 0);
14501 return (GET_CODE (x) == SYMBOL_REF
14502 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14503 case UNSPEC_MACHOPIC_OFFSET:
14504 return legitimate_pic_address_disp_p (x);
14512 return legitimate_pic_address_disp_p (x);
14519 /* Determine if a given CONST RTX is a valid memory displacement
14523 legitimate_pic_address_disp_p (rtx disp)
14527 /* In 64bit mode we can allow direct addresses of symbols and labels
14528 when they are not dynamic symbols. */
14531 rtx op0 = disp, op1;
14533 switch (GET_CODE (disp))
14539 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14541 op0 = XEXP (XEXP (disp, 0), 0);
14542 op1 = XEXP (XEXP (disp, 0), 1);
14543 if (!CONST_INT_P (op1)
14544 || INTVAL (op1) >= 16*1024*1024
14545 || INTVAL (op1) < -16*1024*1024)
14547 if (GET_CODE (op0) == LABEL_REF)
14549 if (GET_CODE (op0) == CONST
14550 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14551 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14553 if (GET_CODE (op0) == UNSPEC
14554 && XINT (op0, 1) == UNSPEC_PCREL)
14556 if (GET_CODE (op0) != SYMBOL_REF)
14561 /* TLS references should always be enclosed in UNSPEC.
14562 The dllimported symbol needs always to be resolved. */
14563 if (SYMBOL_REF_TLS_MODEL (op0)
14564 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14569 if (is_imported_p (op0))
14572 if (SYMBOL_REF_FAR_ADDR_P (op0)
14573 || !SYMBOL_REF_LOCAL_P (op0))
14576 /* Function-symbols need to be resolved only for
14578 For the small-model we don't need to resolve anything
14580 if ((ix86_cmodel != CM_LARGE_PIC
14581 && SYMBOL_REF_FUNCTION_P (op0))
14582 || ix86_cmodel == CM_SMALL_PIC)
14584 /* Non-external symbols don't need to be resolved for
14585 large, and medium-model. */
14586 if ((ix86_cmodel == CM_LARGE_PIC
14587 || ix86_cmodel == CM_MEDIUM_PIC)
14588 && !SYMBOL_REF_EXTERNAL_P (op0))
14591 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14592 && (SYMBOL_REF_LOCAL_P (op0)
14593 || (HAVE_LD_PIE_COPYRELOC
14595 && !SYMBOL_REF_WEAK (op0)
14596 && !SYMBOL_REF_FUNCTION_P (op0)))
14597 && ix86_cmodel != CM_LARGE_PIC)
14605 if (GET_CODE (disp) != CONST)
14607 disp = XEXP (disp, 0);
14611 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14612 of GOT tables. We should not need these anyway. */
14613 if (GET_CODE (disp) != UNSPEC
14614 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14615 && XINT (disp, 1) != UNSPEC_GOTOFF
14616 && XINT (disp, 1) != UNSPEC_PCREL
14617 && XINT (disp, 1) != UNSPEC_PLTOFF))
14620 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14621 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14627 if (GET_CODE (disp) == PLUS)
14629 if (!CONST_INT_P (XEXP (disp, 1)))
14631 disp = XEXP (disp, 0);
14635 if (TARGET_MACHO && darwin_local_data_pic (disp))
14638 if (GET_CODE (disp) != UNSPEC)
14641 switch (XINT (disp, 1))
14646 /* We need to check for both symbols and labels because VxWorks loads
14647 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14649 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14650 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14651 case UNSPEC_GOTOFF:
14652 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14653 While ABI specify also 32bit relocation but we don't produce it in
14654 small PIC model at all. */
14655 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14656 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14658 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14660 case UNSPEC_GOTTPOFF:
14661 case UNSPEC_GOTNTPOFF:
14662 case UNSPEC_INDNTPOFF:
14665 disp = XVECEXP (disp, 0, 0);
14666 return (GET_CODE (disp) == SYMBOL_REF
14667 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14668 case UNSPEC_NTPOFF:
14669 disp = XVECEXP (disp, 0, 0);
14670 return (GET_CODE (disp) == SYMBOL_REF
14671 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14672 case UNSPEC_DTPOFF:
14673 disp = XVECEXP (disp, 0, 0);
14674 return (GET_CODE (disp) == SYMBOL_REF
14675 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14681 /* Determine if op is suitable RTX for an address register.
14682 Return naked register if a register or a register subreg is
14683 found, otherwise return NULL_RTX. */
14686 ix86_validate_address_register (rtx op)
14688 machine_mode mode = GET_MODE (op);
14690 /* Only SImode or DImode registers can form the address. */
14691 if (mode != SImode && mode != DImode)
14696 else if (SUBREG_P (op))
14698 rtx reg = SUBREG_REG (op);
14703 mode = GET_MODE (reg);
14705 /* Don't allow SUBREGs that span more than a word. It can
14706 lead to spill failures when the register is one word out
14707 of a two word structure. */
14708 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14711 /* Allow only SUBREGs of non-eliminable hard registers. */
14712 if (register_no_elim_operand (reg, mode))
14716 /* Op is not a register. */
14720 /* Recognizes RTL expressions that are valid memory addresses for an
14721 instruction. The MODE argument is the machine mode for the MEM
14722 expression that wants to use this address.
14724 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14725 convert common non-canonical forms to canonical form so that they will
14729 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14731 struct ix86_address parts;
14732 rtx base, index, disp;
14733 HOST_WIDE_INT scale;
14736 if (ix86_decompose_address (addr, &parts) <= 0)
14737 /* Decomposition failed. */
14741 index = parts.index;
14743 scale = parts.scale;
14746 /* Validate base register. */
14749 rtx reg = ix86_validate_address_register (base);
14751 if (reg == NULL_RTX)
14754 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14755 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14756 /* Base is not valid. */
14760 /* Validate index register. */
14763 rtx reg = ix86_validate_address_register (index);
14765 if (reg == NULL_RTX)
14768 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14769 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14770 /* Index is not valid. */
14774 /* Index and base should have the same mode. */
14776 && GET_MODE (base) != GET_MODE (index))
14779 /* Address override works only on the (%reg) part of %fs:(%reg). */
14780 if (seg != ADDR_SPACE_GENERIC
14781 && ((base && GET_MODE (base) != word_mode)
14782 || (index && GET_MODE (index) != word_mode)))
14785 /* Validate scale factor. */
14789 /* Scale without index. */
14792 if (scale != 2 && scale != 4 && scale != 8)
14793 /* Scale is not a valid multiplier. */
14797 /* Validate displacement. */
14800 if (GET_CODE (disp) == CONST
14801 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14802 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14803 switch (XINT (XEXP (disp, 0), 1))
14805 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14806 used. While ABI specify also 32bit relocations, we don't produce
14807 them at all and use IP relative instead. */
14809 case UNSPEC_GOTOFF:
14810 gcc_assert (flag_pic);
14812 goto is_legitimate_pic;
14814 /* 64bit address unspec. */
14817 case UNSPEC_GOTPCREL:
14819 gcc_assert (flag_pic);
14820 goto is_legitimate_pic;
14822 case UNSPEC_GOTTPOFF:
14823 case UNSPEC_GOTNTPOFF:
14824 case UNSPEC_INDNTPOFF:
14825 case UNSPEC_NTPOFF:
14826 case UNSPEC_DTPOFF:
14829 case UNSPEC_STACK_CHECK:
14830 gcc_assert (flag_split_stack);
14834 /* Invalid address unspec. */
14838 else if (SYMBOLIC_CONST (disp)
14842 && MACHOPIC_INDIRECT
14843 && !machopic_operand_p (disp)
14849 if (TARGET_64BIT && (index || base))
14851 /* foo@dtpoff(%rX) is ok. */
14852 if (GET_CODE (disp) != CONST
14853 || GET_CODE (XEXP (disp, 0)) != PLUS
14854 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14855 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14856 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14857 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14858 /* Non-constant pic memory reference. */
14861 else if ((!TARGET_MACHO || flag_pic)
14862 && ! legitimate_pic_address_disp_p (disp))
14863 /* Displacement is an invalid pic construct. */
14866 else if (MACHO_DYNAMIC_NO_PIC_P
14867 && !ix86_legitimate_constant_p (Pmode, disp))
14868 /* displacment must be referenced via non_lazy_pointer */
14872 /* This code used to verify that a symbolic pic displacement
14873 includes the pic_offset_table_rtx register.
14875 While this is good idea, unfortunately these constructs may
14876 be created by "adds using lea" optimization for incorrect
14885 This code is nonsensical, but results in addressing
14886 GOT table with pic_offset_table_rtx base. We can't
14887 just refuse it easily, since it gets matched by
14888 "addsi3" pattern, that later gets split to lea in the
14889 case output register differs from input. While this
14890 can be handled by separate addsi pattern for this case
14891 that never results in lea, this seems to be easier and
14892 correct fix for crash to disable this test. */
14894 else if (GET_CODE (disp) != LABEL_REF
14895 && !CONST_INT_P (disp)
14896 && (GET_CODE (disp) != CONST
14897 || !ix86_legitimate_constant_p (Pmode, disp))
14898 && (GET_CODE (disp) != SYMBOL_REF
14899 || !ix86_legitimate_constant_p (Pmode, disp)))
14900 /* Displacement is not constant. */
14902 else if (TARGET_64BIT
14903 && !x86_64_immediate_operand (disp, VOIDmode))
14904 /* Displacement is out of range. */
14906 /* In x32 mode, constant addresses are sign extended to 64bit, so
14907 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14908 else if (TARGET_X32 && !(index || base)
14909 && CONST_INT_P (disp)
14910 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14914 /* Everything looks valid. */
14918 /* Determine if a given RTX is a valid constant address. */
14921 constant_address_p (rtx x)
14923 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14926 /* Return a unique alias set for the GOT. */
14928 static alias_set_type
14929 ix86_GOT_alias_set (void)
14931 static alias_set_type set = -1;
14933 set = new_alias_set ();
14937 /* Return a legitimate reference for ORIG (an address) using the
14938 register REG. If REG is 0, a new pseudo is generated.
14940 There are two types of references that must be handled:
14942 1. Global data references must load the address from the GOT, via
14943 the PIC reg. An insn is emitted to do this load, and the reg is
14946 2. Static data references, constant pool addresses, and code labels
14947 compute the address as an offset from the GOT, whose base is in
14948 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14949 differentiate them from global data objects. The returned
14950 address is the PIC reg + an unspec constant.
14952 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14953 reg also appears in the address. */
14956 legitimize_pic_address (rtx orig, rtx reg)
14959 rtx new_rtx = orig;
14962 if (TARGET_MACHO && !TARGET_64BIT)
14965 reg = gen_reg_rtx (Pmode);
14966 /* Use the generic Mach-O PIC machinery. */
14967 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
14971 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14973 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14978 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
14980 else if (TARGET_64BIT && !TARGET_PECOFF
14981 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
14984 /* This symbol may be referenced via a displacement from the PIC
14985 base address (@GOTOFF). */
14987 if (GET_CODE (addr) == CONST)
14988 addr = XEXP (addr, 0);
14989 if (GET_CODE (addr) == PLUS)
14991 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
14993 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
14996 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14997 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14999 tmpreg = gen_reg_rtx (Pmode);
15002 emit_move_insn (tmpreg, new_rtx);
15006 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
15007 tmpreg, 1, OPTAB_DIRECT);
15011 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
15013 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
15015 /* This symbol may be referenced via a displacement from the PIC
15016 base address (@GOTOFF). */
15018 if (GET_CODE (addr) == CONST)
15019 addr = XEXP (addr, 0);
15020 if (GET_CODE (addr) == PLUS)
15022 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15024 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15027 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15028 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15029 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15033 emit_move_insn (reg, new_rtx);
15037 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15038 /* We can't use @GOTOFF for text labels on VxWorks;
15039 see gotoff_operand. */
15040 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15042 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15046 /* For x64 PE-COFF there is no GOT table. So we use address
15048 if (TARGET_64BIT && TARGET_PECOFF)
15050 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15051 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15054 reg = gen_reg_rtx (Pmode);
15055 emit_move_insn (reg, new_rtx);
15058 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15060 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
15061 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15062 new_rtx = gen_const_mem (Pmode, new_rtx);
15063 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15066 reg = gen_reg_rtx (Pmode);
15067 /* Use directly gen_movsi, otherwise the address is loaded
15068 into register for CSE. We don't want to CSE this addresses,
15069 instead we CSE addresses from the GOT table, so skip this. */
15070 emit_insn (gen_movsi (reg, new_rtx));
15075 /* This symbol must be referenced via a load from the
15076 Global Offset Table (@GOT). */
15078 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15079 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15081 new_rtx = force_reg (Pmode, new_rtx);
15082 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15083 new_rtx = gen_const_mem (Pmode, new_rtx);
15084 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15087 reg = gen_reg_rtx (Pmode);
15088 emit_move_insn (reg, new_rtx);
15094 if (CONST_INT_P (addr)
15095 && !x86_64_immediate_operand (addr, VOIDmode))
15099 emit_move_insn (reg, addr);
15103 new_rtx = force_reg (Pmode, addr);
15105 else if (GET_CODE (addr) == CONST)
15107 addr = XEXP (addr, 0);
15109 /* We must match stuff we generate before. Assume the only
15110 unspecs that can get here are ours. Not that we could do
15111 anything with them anyway.... */
15112 if (GET_CODE (addr) == UNSPEC
15113 || (GET_CODE (addr) == PLUS
15114 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15116 gcc_assert (GET_CODE (addr) == PLUS);
15118 if (GET_CODE (addr) == PLUS)
15120 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15122 /* Check first to see if this is a constant offset from a @GOTOFF
15123 symbol reference. */
15124 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15125 && CONST_INT_P (op1))
15129 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15131 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15132 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15133 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15137 emit_move_insn (reg, new_rtx);
15143 if (INTVAL (op1) < -16*1024*1024
15144 || INTVAL (op1) >= 16*1024*1024)
15146 if (!x86_64_immediate_operand (op1, Pmode))
15147 op1 = force_reg (Pmode, op1);
15148 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15154 rtx base = legitimize_pic_address (op0, reg);
15155 machine_mode mode = GET_MODE (base);
15157 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15159 if (CONST_INT_P (new_rtx))
15161 if (INTVAL (new_rtx) < -16*1024*1024
15162 || INTVAL (new_rtx) >= 16*1024*1024)
15164 if (!x86_64_immediate_operand (new_rtx, mode))
15165 new_rtx = force_reg (mode, new_rtx);
15167 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15170 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15174 /* For %rip addressing, we have to use just disp32, not
15177 && (GET_CODE (base) == SYMBOL_REF
15178 || GET_CODE (base) == LABEL_REF))
15179 base = force_reg (mode, base);
15180 if (GET_CODE (new_rtx) == PLUS
15181 && CONSTANT_P (XEXP (new_rtx, 1)))
15183 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15184 new_rtx = XEXP (new_rtx, 1);
15186 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15194 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15197 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15199 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15201 if (GET_MODE (tp) != tp_mode)
15203 gcc_assert (GET_MODE (tp) == SImode);
15204 gcc_assert (tp_mode == DImode);
15206 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15210 tp = copy_to_mode_reg (tp_mode, tp);
15215 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15217 static GTY(()) rtx ix86_tls_symbol;
15220 ix86_tls_get_addr (void)
15222 if (!ix86_tls_symbol)
15225 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15226 ? "___tls_get_addr" : "__tls_get_addr");
15228 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15231 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15233 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15235 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15236 gen_rtx_CONST (Pmode, unspec));
15239 return ix86_tls_symbol;
15242 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15244 static GTY(()) rtx ix86_tls_module_base_symbol;
15247 ix86_tls_module_base (void)
15249 if (!ix86_tls_module_base_symbol)
15251 ix86_tls_module_base_symbol
15252 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15254 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15255 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15258 return ix86_tls_module_base_symbol;
15261 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15262 false if we expect this to be used for a memory address and true if
15263 we expect to load the address into a register. */
15266 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15268 rtx dest, base, off;
15269 rtx pic = NULL_RTX, tp = NULL_RTX;
15270 machine_mode tp_mode = Pmode;
15273 /* Fall back to global dynamic model if tool chain cannot support local
15275 if (TARGET_SUN_TLS && !TARGET_64BIT
15276 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15277 && model == TLS_MODEL_LOCAL_DYNAMIC)
15278 model = TLS_MODEL_GLOBAL_DYNAMIC;
15282 case TLS_MODEL_GLOBAL_DYNAMIC:
15283 dest = gen_reg_rtx (Pmode);
15287 if (flag_pic && !TARGET_PECOFF)
15288 pic = pic_offset_table_rtx;
15291 pic = gen_reg_rtx (Pmode);
15292 emit_insn (gen_set_got (pic));
15296 if (TARGET_GNU2_TLS)
15299 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15301 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15303 tp = get_thread_pointer (Pmode, true);
15304 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15306 if (GET_MODE (x) != Pmode)
15307 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15309 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15313 rtx caddr = ix86_tls_get_addr ();
15317 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15322 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15323 insns = get_insns ();
15326 if (GET_MODE (x) != Pmode)
15327 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15329 RTL_CONST_CALL_P (insns) = 1;
15330 emit_libcall_block (insns, dest, rax, x);
15333 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15337 case TLS_MODEL_LOCAL_DYNAMIC:
15338 base = gen_reg_rtx (Pmode);
15343 pic = pic_offset_table_rtx;
15346 pic = gen_reg_rtx (Pmode);
15347 emit_insn (gen_set_got (pic));
15351 if (TARGET_GNU2_TLS)
15353 rtx tmp = ix86_tls_module_base ();
15356 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15358 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15360 tp = get_thread_pointer (Pmode, true);
15361 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15362 gen_rtx_MINUS (Pmode, tmp, tp));
15366 rtx caddr = ix86_tls_get_addr ();
15370 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15376 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15377 insns = get_insns ();
15380 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15381 share the LD_BASE result with other LD model accesses. */
15382 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15383 UNSPEC_TLS_LD_BASE);
15385 RTL_CONST_CALL_P (insns) = 1;
15386 emit_libcall_block (insns, base, rax, eqv);
15389 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15392 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15393 off = gen_rtx_CONST (Pmode, off);
15395 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15397 if (TARGET_GNU2_TLS)
15399 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15401 if (GET_MODE (x) != Pmode)
15402 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15404 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15408 case TLS_MODEL_INITIAL_EXEC:
15411 if (TARGET_SUN_TLS && !TARGET_X32)
15413 /* The Sun linker took the AMD64 TLS spec literally
15414 and can only handle %rax as destination of the
15415 initial executable code sequence. */
15417 dest = gen_reg_rtx (DImode);
15418 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15422 /* Generate DImode references to avoid %fs:(%reg32)
15423 problems and linker IE->LE relaxation bug. */
15426 type = UNSPEC_GOTNTPOFF;
15430 pic = pic_offset_table_rtx;
15431 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15433 else if (!TARGET_ANY_GNU_TLS)
15435 pic = gen_reg_rtx (Pmode);
15436 emit_insn (gen_set_got (pic));
15437 type = UNSPEC_GOTTPOFF;
15442 type = UNSPEC_INDNTPOFF;
15445 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15446 off = gen_rtx_CONST (tp_mode, off);
15448 off = gen_rtx_PLUS (tp_mode, pic, off);
15449 off = gen_const_mem (tp_mode, off);
15450 set_mem_alias_set (off, ix86_GOT_alias_set ());
15452 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15454 base = get_thread_pointer (tp_mode,
15455 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15456 off = force_reg (tp_mode, off);
15457 return gen_rtx_PLUS (tp_mode, base, off);
15461 base = get_thread_pointer (Pmode, true);
15462 dest = gen_reg_rtx (Pmode);
15463 emit_insn (ix86_gen_sub3 (dest, base, off));
15467 case TLS_MODEL_LOCAL_EXEC:
15468 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15469 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15470 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15471 off = gen_rtx_CONST (Pmode, off);
15473 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15475 base = get_thread_pointer (Pmode,
15476 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15477 return gen_rtx_PLUS (Pmode, base, off);
15481 base = get_thread_pointer (Pmode, true);
15482 dest = gen_reg_rtx (Pmode);
15483 emit_insn (ix86_gen_sub3 (dest, base, off));
15488 gcc_unreachable ();
15494 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15495 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15496 unique refptr-DECL symbol corresponding to symbol DECL. */
15498 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15500 static inline hashval_t hash (tree_map *m) { return m->hash; }
15502 equal (tree_map *a, tree_map *b)
15504 return a->base.from == b->base.from;
15508 keep_cache_entry (tree_map *&m)
15510 return ggc_marked_p (m->base.from);
15514 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15517 get_dllimport_decl (tree decl, bool beimport)
15519 struct tree_map *h, in;
15521 const char *prefix;
15522 size_t namelen, prefixlen;
15527 if (!dllimport_map)
15528 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15530 in.hash = htab_hash_pointer (decl);
15531 in.base.from = decl;
15532 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15537 *loc = h = ggc_alloc<tree_map> ();
15539 h->base.from = decl;
15540 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15541 VAR_DECL, NULL, ptr_type_node);
15542 DECL_ARTIFICIAL (to) = 1;
15543 DECL_IGNORED_P (to) = 1;
15544 DECL_EXTERNAL (to) = 1;
15545 TREE_READONLY (to) = 1;
15547 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15548 name = targetm.strip_name_encoding (name);
15550 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15551 ? "*__imp_" : "*__imp__";
15553 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15554 namelen = strlen (name);
15555 prefixlen = strlen (prefix);
15556 imp_name = (char *) alloca (namelen + prefixlen + 1);
15557 memcpy (imp_name, prefix, prefixlen);
15558 memcpy (imp_name + prefixlen, name, namelen + 1);
15560 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15561 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15562 SET_SYMBOL_REF_DECL (rtl, to);
15563 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15566 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15567 #ifdef SUB_TARGET_RECORD_STUB
15568 SUB_TARGET_RECORD_STUB (name);
15572 rtl = gen_const_mem (Pmode, rtl);
15573 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15575 SET_DECL_RTL (to, rtl);
15576 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15581 /* Expand SYMBOL into its corresponding far-addresse symbol.
15582 WANT_REG is true if we require the result be a register. */
15585 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15590 gcc_assert (SYMBOL_REF_DECL (symbol));
15591 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15593 x = DECL_RTL (imp_decl);
15595 x = force_reg (Pmode, x);
15599 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15600 true if we require the result be a register. */
15603 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15608 gcc_assert (SYMBOL_REF_DECL (symbol));
15609 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15611 x = DECL_RTL (imp_decl);
15613 x = force_reg (Pmode, x);
15617 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15618 is true if we require the result be a register. */
15621 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15623 if (!TARGET_PECOFF)
15626 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15628 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15629 return legitimize_dllimport_symbol (addr, inreg);
15630 if (GET_CODE (addr) == CONST
15631 && GET_CODE (XEXP (addr, 0)) == PLUS
15632 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15633 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15635 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15636 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15640 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15642 if (GET_CODE (addr) == SYMBOL_REF
15643 && !is_imported_p (addr)
15644 && SYMBOL_REF_EXTERNAL_P (addr)
15645 && SYMBOL_REF_DECL (addr))
15646 return legitimize_pe_coff_extern_decl (addr, inreg);
15648 if (GET_CODE (addr) == CONST
15649 && GET_CODE (XEXP (addr, 0)) == PLUS
15650 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15651 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15652 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15653 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15655 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15656 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15661 /* Try machine-dependent ways of modifying an illegitimate address
15662 to be legitimate. If we find one, return the new, valid address.
15663 This macro is used in only one place: `memory_address' in explow.c.
15665 OLDX is the address as it was before break_out_memory_refs was called.
15666 In some cases it is useful to look at this to decide what needs to be done.
15668 It is always safe for this macro to do nothing. It exists to recognize
15669 opportunities to optimize the output.
15671 For the 80386, we handle X+REG by loading X into a register R and
15672 using R+REG. R will go in a general reg and indexing will be used.
15673 However, if REG is a broken-out memory address or multiplication,
15674 nothing needs to be done because REG can certainly go in a general reg.
15676 When -fpic is used, special handling is needed for symbolic references.
15677 See comments by legitimize_pic_address in i386.c for details. */
15680 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15682 bool changed = false;
15685 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15687 return legitimize_tls_address (x, (enum tls_model) log, false);
15688 if (GET_CODE (x) == CONST
15689 && GET_CODE (XEXP (x, 0)) == PLUS
15690 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15691 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15693 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15694 (enum tls_model) log, false);
15695 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15698 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15700 rtx tmp = legitimize_pe_coff_symbol (x, true);
15705 if (flag_pic && SYMBOLIC_CONST (x))
15706 return legitimize_pic_address (x, 0);
15709 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15710 return machopic_indirect_data_reference (x, 0);
15713 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15714 if (GET_CODE (x) == ASHIFT
15715 && CONST_INT_P (XEXP (x, 1))
15716 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15719 log = INTVAL (XEXP (x, 1));
15720 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15721 GEN_INT (1 << log));
15724 if (GET_CODE (x) == PLUS)
15726 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15728 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15729 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15730 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15733 log = INTVAL (XEXP (XEXP (x, 0), 1));
15734 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15735 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15736 GEN_INT (1 << log));
15739 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15740 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15741 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15744 log = INTVAL (XEXP (XEXP (x, 1), 1));
15745 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15746 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15747 GEN_INT (1 << log));
15750 /* Put multiply first if it isn't already. */
15751 if (GET_CODE (XEXP (x, 1)) == MULT)
15753 std::swap (XEXP (x, 0), XEXP (x, 1));
15757 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15758 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15759 created by virtual register instantiation, register elimination, and
15760 similar optimizations. */
15761 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15764 x = gen_rtx_PLUS (Pmode,
15765 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15766 XEXP (XEXP (x, 1), 0)),
15767 XEXP (XEXP (x, 1), 1));
15771 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15772 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15773 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15774 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15775 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15776 && CONSTANT_P (XEXP (x, 1)))
15779 rtx other = NULL_RTX;
15781 if (CONST_INT_P (XEXP (x, 1)))
15783 constant = XEXP (x, 1);
15784 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15786 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15788 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15789 other = XEXP (x, 1);
15797 x = gen_rtx_PLUS (Pmode,
15798 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15799 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15800 plus_constant (Pmode, other,
15801 INTVAL (constant)));
15805 if (changed && ix86_legitimate_address_p (mode, x, false))
15808 if (GET_CODE (XEXP (x, 0)) == MULT)
15811 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15814 if (GET_CODE (XEXP (x, 1)) == MULT)
15817 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15821 && REG_P (XEXP (x, 1))
15822 && REG_P (XEXP (x, 0)))
15825 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15828 x = legitimize_pic_address (x, 0);
15831 if (changed && ix86_legitimate_address_p (mode, x, false))
15834 if (REG_P (XEXP (x, 0)))
15836 rtx temp = gen_reg_rtx (Pmode);
15837 rtx val = force_operand (XEXP (x, 1), temp);
15840 val = convert_to_mode (Pmode, val, 1);
15841 emit_move_insn (temp, val);
15844 XEXP (x, 1) = temp;
15848 else if (REG_P (XEXP (x, 1)))
15850 rtx temp = gen_reg_rtx (Pmode);
15851 rtx val = force_operand (XEXP (x, 0), temp);
15854 val = convert_to_mode (Pmode, val, 1);
15855 emit_move_insn (temp, val);
15858 XEXP (x, 0) = temp;
15866 /* Print an integer constant expression in assembler syntax. Addition
15867 and subtraction are the only arithmetic that may appear in these
15868 expressions. FILE is the stdio stream to write to, X is the rtx, and
15869 CODE is the operand print code from the output string. */
15872 output_pic_addr_const (FILE *file, rtx x, int code)
15876 switch (GET_CODE (x))
15879 gcc_assert (flag_pic);
15884 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15885 output_addr_const (file, x);
15888 const char *name = XSTR (x, 0);
15890 /* Mark the decl as referenced so that cgraph will
15891 output the function. */
15892 if (SYMBOL_REF_DECL (x))
15893 mark_decl_referenced (SYMBOL_REF_DECL (x));
15896 if (MACHOPIC_INDIRECT
15897 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15898 name = machopic_indirection_name (x, /*stub_p=*/true);
15900 assemble_name (file, name);
15902 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15903 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15904 fputs ("@PLT", file);
15911 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15912 assemble_name (asm_out_file, buf);
15916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15920 /* This used to output parentheses around the expression,
15921 but that does not work on the 386 (either ATT or BSD assembler). */
15922 output_pic_addr_const (file, XEXP (x, 0), code);
15926 /* We can't handle floating point constants;
15927 TARGET_PRINT_OPERAND must handle them. */
15928 output_operand_lossage ("floating constant misused");
15932 /* Some assemblers need integer constants to appear first. */
15933 if (CONST_INT_P (XEXP (x, 0)))
15935 output_pic_addr_const (file, XEXP (x, 0), code);
15937 output_pic_addr_const (file, XEXP (x, 1), code);
15941 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15942 output_pic_addr_const (file, XEXP (x, 1), code);
15944 output_pic_addr_const (file, XEXP (x, 0), code);
15950 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15951 output_pic_addr_const (file, XEXP (x, 0), code);
15953 output_pic_addr_const (file, XEXP (x, 1), code);
15955 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15959 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15961 bool f = i386_asm_output_addr_const_extra (file, x);
15966 gcc_assert (XVECLEN (x, 0) == 1);
15967 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
15968 switch (XINT (x, 1))
15971 fputs ("@GOT", file);
15973 case UNSPEC_GOTOFF:
15974 fputs ("@GOTOFF", file);
15976 case UNSPEC_PLTOFF:
15977 fputs ("@PLTOFF", file);
15980 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15981 "(%rip)" : "[rip]", file);
15983 case UNSPEC_GOTPCREL:
15984 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15985 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
15987 case UNSPEC_GOTTPOFF:
15988 /* FIXME: This might be @TPOFF in Sun ld too. */
15989 fputs ("@gottpoff", file);
15992 fputs ("@tpoff", file);
15994 case UNSPEC_NTPOFF:
15996 fputs ("@tpoff", file);
15998 fputs ("@ntpoff", file);
16000 case UNSPEC_DTPOFF:
16001 fputs ("@dtpoff", file);
16003 case UNSPEC_GOTNTPOFF:
16005 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16006 "@gottpoff(%rip)": "@gottpoff[rip]", file);
16008 fputs ("@gotntpoff", file);
16010 case UNSPEC_INDNTPOFF:
16011 fputs ("@indntpoff", file);
16014 case UNSPEC_MACHOPIC_OFFSET:
16016 machopic_output_function_base_name (file);
16020 output_operand_lossage ("invalid UNSPEC as operand");
16026 output_operand_lossage ("invalid expression as operand");
16030 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16031 We need to emit DTP-relative relocations. */
16033 static void ATTRIBUTE_UNUSED
16034 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16036 fputs (ASM_LONG, file);
16037 output_addr_const (file, x);
16038 fputs ("@dtpoff", file);
16044 fputs (", 0", file);
16047 gcc_unreachable ();
16051 /* Return true if X is a representation of the PIC register. This copes
16052 with calls from ix86_find_base_term, where the register might have
16053 been replaced by a cselib value. */
16056 ix86_pic_register_p (rtx x)
16058 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16059 return (pic_offset_table_rtx
16060 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16061 else if (!REG_P (x))
16063 else if (pic_offset_table_rtx)
16065 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16067 if (HARD_REGISTER_P (x)
16068 && !HARD_REGISTER_P (pic_offset_table_rtx)
16069 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16074 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16077 /* Helper function for ix86_delegitimize_address.
16078 Attempt to delegitimize TLS local-exec accesses. */
16081 ix86_delegitimize_tls_address (rtx orig_x)
16083 rtx x = orig_x, unspec;
16084 struct ix86_address addr;
16086 if (!TARGET_TLS_DIRECT_SEG_REFS)
16090 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16092 if (ix86_decompose_address (x, &addr) == 0
16093 || addr.seg != DEFAULT_TLS_SEG_REG
16094 || addr.disp == NULL_RTX
16095 || GET_CODE (addr.disp) != CONST)
16097 unspec = XEXP (addr.disp, 0);
16098 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16099 unspec = XEXP (unspec, 0);
16100 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16102 x = XVECEXP (unspec, 0, 0);
16103 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16104 if (unspec != XEXP (addr.disp, 0))
16105 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16108 rtx idx = addr.index;
16109 if (addr.scale != 1)
16110 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16111 x = gen_rtx_PLUS (Pmode, idx, x);
16114 x = gen_rtx_PLUS (Pmode, addr.base, x);
16115 if (MEM_P (orig_x))
16116 x = replace_equiv_address_nv (orig_x, x);
16120 /* In the name of slightly smaller debug output, and to cater to
16121 general assembler lossage, recognize PIC+GOTOFF and turn it back
16122 into a direct symbol reference.
16124 On Darwin, this is necessary to avoid a crash, because Darwin
16125 has a different PIC label for each routine but the DWARF debugging
16126 information is not associated with any particular routine, so it's
16127 necessary to remove references to the PIC label from RTL stored by
16128 the DWARF output code. */
16131 ix86_delegitimize_address (rtx x)
16133 rtx orig_x = delegitimize_mem_from_attrs (x);
16134 /* addend is NULL or some rtx if x is something+GOTOFF where
16135 something doesn't include the PIC register. */
16136 rtx addend = NULL_RTX;
16137 /* reg_addend is NULL or a multiple of some register. */
16138 rtx reg_addend = NULL_RTX;
16139 /* const_addend is NULL or a const_int. */
16140 rtx const_addend = NULL_RTX;
16141 /* This is the result, or NULL. */
16142 rtx result = NULL_RTX;
16151 if (GET_CODE (x) == CONST
16152 && GET_CODE (XEXP (x, 0)) == PLUS
16153 && GET_MODE (XEXP (x, 0)) == Pmode
16154 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16155 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16156 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16158 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16159 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16160 if (MEM_P (orig_x))
16161 x = replace_equiv_address_nv (orig_x, x);
16165 if (GET_CODE (x) == CONST
16166 && GET_CODE (XEXP (x, 0)) == UNSPEC
16167 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16168 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16169 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16171 x = XVECEXP (XEXP (x, 0), 0, 0);
16172 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16174 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16182 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16183 return ix86_delegitimize_tls_address (orig_x);
16185 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16186 and -mcmodel=medium -fpic. */
16189 if (GET_CODE (x) != PLUS
16190 || GET_CODE (XEXP (x, 1)) != CONST)
16191 return ix86_delegitimize_tls_address (orig_x);
16193 if (ix86_pic_register_p (XEXP (x, 0)))
16194 /* %ebx + GOT/GOTOFF */
16196 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16198 /* %ebx + %reg * scale + GOT/GOTOFF */
16199 reg_addend = XEXP (x, 0);
16200 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16201 reg_addend = XEXP (reg_addend, 1);
16202 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16203 reg_addend = XEXP (reg_addend, 0);
16206 reg_addend = NULL_RTX;
16207 addend = XEXP (x, 0);
16211 addend = XEXP (x, 0);
16213 x = XEXP (XEXP (x, 1), 0);
16214 if (GET_CODE (x) == PLUS
16215 && CONST_INT_P (XEXP (x, 1)))
16217 const_addend = XEXP (x, 1);
16221 if (GET_CODE (x) == UNSPEC
16222 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16223 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16224 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16225 && !MEM_P (orig_x) && !addend)))
16226 result = XVECEXP (x, 0, 0);
16228 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16229 && !MEM_P (orig_x))
16230 result = XVECEXP (x, 0, 0);
16233 return ix86_delegitimize_tls_address (orig_x);
16236 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16238 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16241 /* If the rest of original X doesn't involve the PIC register, add
16242 addend and subtract pic_offset_table_rtx. This can happen e.g.
16244 leal (%ebx, %ecx, 4), %ecx
16246 movl foo@GOTOFF(%ecx), %edx
16247 in which case we return (%ecx - %ebx) + foo
16248 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16249 and reload has completed. */
16250 if (pic_offset_table_rtx
16251 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16252 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16253 pic_offset_table_rtx),
16255 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16257 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16258 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16259 result = gen_rtx_PLUS (Pmode, tmp, result);
16264 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16266 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16267 if (result == NULL_RTX)
16273 /* If X is a machine specific address (i.e. a symbol or label being
16274 referenced as a displacement from the GOT implemented using an
16275 UNSPEC), then return the base term. Otherwise return X. */
16278 ix86_find_base_term (rtx x)
16284 if (GET_CODE (x) != CONST)
16286 term = XEXP (x, 0);
16287 if (GET_CODE (term) == PLUS
16288 && CONST_INT_P (XEXP (term, 1)))
16289 term = XEXP (term, 0);
16290 if (GET_CODE (term) != UNSPEC
16291 || (XINT (term, 1) != UNSPEC_GOTPCREL
16292 && XINT (term, 1) != UNSPEC_PCREL))
16295 return XVECEXP (term, 0, 0);
16298 return ix86_delegitimize_address (x);
16302 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16303 bool fp, FILE *file)
16305 const char *suffix;
16307 if (mode == CCFPmode || mode == CCFPUmode)
16309 code = ix86_fp_compare_code_to_integer (code);
16313 code = reverse_condition (code);
16364 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16368 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16369 Those same assemblers have the same but opposite lossage on cmov. */
16370 if (mode == CCmode)
16371 suffix = fp ? "nbe" : "a";
16373 gcc_unreachable ();
16389 gcc_unreachable ();
16393 if (mode == CCmode)
16395 else if (mode == CCCmode)
16396 suffix = fp ? "b" : "c";
16398 gcc_unreachable ();
16414 gcc_unreachable ();
16418 if (mode == CCmode)
16420 else if (mode == CCCmode)
16421 suffix = fp ? "nb" : "nc";
16423 gcc_unreachable ();
16426 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16430 if (mode == CCmode)
16433 gcc_unreachable ();
16436 suffix = fp ? "u" : "p";
16439 suffix = fp ? "nu" : "np";
16442 gcc_unreachable ();
16444 fputs (suffix, file);
16447 /* Print the name of register X to FILE based on its machine mode and number.
16448 If CODE is 'w', pretend the mode is HImode.
16449 If CODE is 'b', pretend the mode is QImode.
16450 If CODE is 'k', pretend the mode is SImode.
16451 If CODE is 'q', pretend the mode is DImode.
16452 If CODE is 'x', pretend the mode is V4SFmode.
16453 If CODE is 't', pretend the mode is V8SFmode.
16454 If CODE is 'g', pretend the mode is V16SFmode.
16455 If CODE is 'h', pretend the reg is the 'high' byte register.
16456 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16457 If CODE is 'd', duplicate the operand for AVX instruction.
16461 print_reg (rtx x, int code, FILE *file)
16465 unsigned int regno;
16468 if (ASSEMBLER_DIALECT == ASM_ATT)
16473 gcc_assert (TARGET_64BIT);
16474 fputs ("rip", file);
16478 if (code == 'y' && STACK_TOP_P (x))
16480 fputs ("st(0)", file);
16486 else if (code == 'b')
16488 else if (code == 'k')
16490 else if (code == 'q')
16492 else if (code == 'h')
16494 else if (code == 'x')
16496 else if (code == 't')
16498 else if (code == 'g')
16501 msize = GET_MODE_SIZE (GET_MODE (x));
16503 regno = true_regnum (x);
16505 gcc_assert (regno != ARG_POINTER_REGNUM
16506 && regno != FRAME_POINTER_REGNUM
16507 && regno != FLAGS_REG
16508 && regno != FPSR_REG
16509 && regno != FPCR_REG);
16511 duplicated = code == 'd' && TARGET_AVX;
16517 if (LEGACY_INT_REGNO_P (regno))
16518 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16523 reg = hi_reg_name[regno];
16526 if (regno >= ARRAY_SIZE (qi_reg_name))
16528 reg = qi_reg_name[regno];
16531 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16533 reg = qi_high_reg_name[regno];
16537 if (SSE_REGNO_P (regno))
16539 gcc_assert (!duplicated);
16540 putc (msize == 32 ? 'y' : 'z', file);
16541 reg = hi_reg_name[regno] + 1;
16546 gcc_unreachable ();
16551 /* Irritatingly, AMD extended registers use
16552 different naming convention: "r%d[bwd]" */
16553 if (REX_INT_REGNO_P (regno))
16555 gcc_assert (TARGET_64BIT);
16559 error ("extended registers have no high halves");
16574 error ("unsupported operand size for extended register");
16582 if (ASSEMBLER_DIALECT == ASM_ATT)
16583 fprintf (file, ", %%%s", reg);
16585 fprintf (file, ", %s", reg);
16589 /* Meaning of CODE:
16590 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16591 C -- print opcode suffix for set/cmov insn.
16592 c -- like C, but print reversed condition
16593 F,f -- likewise, but for floating-point.
16594 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16596 R -- print embeded rounding and sae.
16597 r -- print only sae.
16598 z -- print the opcode suffix for the size of the current operand.
16599 Z -- likewise, with special suffixes for x87 instructions.
16600 * -- print a star (in certain assembler syntax)
16601 A -- print an absolute memory reference.
16602 E -- print address with DImode register names if TARGET_64BIT.
16603 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16604 s -- print a shift double count, followed by the assemblers argument
16606 b -- print the QImode name of the register for the indicated operand.
16607 %b0 would print %al if operands[0] is reg 0.
16608 w -- likewise, print the HImode name of the register.
16609 k -- likewise, print the SImode name of the register.
16610 q -- likewise, print the DImode name of the register.
16611 x -- likewise, print the V4SFmode name of the register.
16612 t -- likewise, print the V8SFmode name of the register.
16613 g -- likewise, print the V16SFmode name of the register.
16614 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16615 y -- print "st(0)" instead of "st" as a register.
16616 d -- print duplicated register operand for AVX instruction.
16617 D -- print condition for SSE cmp instruction.
16618 P -- if PIC, print an @PLT suffix.
16619 p -- print raw symbol name.
16620 X -- don't print any sort of PIC '@' suffix for a symbol.
16621 & -- print some in-use local-dynamic symbol name.
16622 H -- print a memory address offset by 8; used for sse high-parts
16623 Y -- print condition for XOP pcom* instruction.
16624 + -- print a branch hint as 'cs' or 'ds' prefix
16625 ; -- print a semicolon (after prefixes due to bug in older gas).
16626 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16627 @ -- print a segment register of thread base pointer load
16628 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16629 ! -- print MPX prefix for jxx/call/ret instructions if required.
16633 ix86_print_operand (FILE *file, rtx x, int code)
16640 switch (ASSEMBLER_DIALECT)
16647 /* Intel syntax. For absolute addresses, registers should not
16648 be surrounded by braces. */
16652 ix86_print_operand (file, x, 0);
16659 gcc_unreachable ();
16662 ix86_print_operand (file, x, 0);
16666 /* Wrap address in an UNSPEC to declare special handling. */
16668 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16670 output_address (VOIDmode, x);
16674 if (ASSEMBLER_DIALECT == ASM_ATT)
16679 if (ASSEMBLER_DIALECT == ASM_ATT)
16684 if (ASSEMBLER_DIALECT == ASM_ATT)
16689 if (ASSEMBLER_DIALECT == ASM_ATT)
16694 if (ASSEMBLER_DIALECT == ASM_ATT)
16699 if (ASSEMBLER_DIALECT == ASM_ATT)
16704 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16705 if (ASSEMBLER_DIALECT != ASM_ATT)
16708 switch (GET_MODE_SIZE (GET_MODE (x)))
16723 output_operand_lossage
16724 ("invalid operand size for operand code 'O'");
16733 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16735 /* Opcodes don't get size suffixes if using Intel opcodes. */
16736 if (ASSEMBLER_DIALECT == ASM_INTEL)
16739 switch (GET_MODE_SIZE (GET_MODE (x)))
16758 output_operand_lossage
16759 ("invalid operand size for operand code 'z'");
16764 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16766 (0, "non-integer operand used with operand code 'z'");
16770 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16771 if (ASSEMBLER_DIALECT == ASM_INTEL)
16774 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16776 switch (GET_MODE_SIZE (GET_MODE (x)))
16779 #ifdef HAVE_AS_IX86_FILDS
16789 #ifdef HAVE_AS_IX86_FILDQ
16792 fputs ("ll", file);
16800 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16802 /* 387 opcodes don't get size suffixes
16803 if the operands are registers. */
16804 if (STACK_REG_P (x))
16807 switch (GET_MODE_SIZE (GET_MODE (x)))
16828 output_operand_lossage
16829 ("invalid operand type used with operand code 'Z'");
16833 output_operand_lossage
16834 ("invalid operand size for operand code 'Z'");
16853 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16855 ix86_print_operand (file, x, 0);
16856 fputs (", ", file);
16861 switch (GET_CODE (x))
16864 fputs ("neq", file);
16867 fputs ("eq", file);
16871 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16875 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16879 fputs ("le", file);
16883 fputs ("lt", file);
16886 fputs ("unord", file);
16889 fputs ("ord", file);
16892 fputs ("ueq", file);
16895 fputs ("nlt", file);
16898 fputs ("nle", file);
16901 fputs ("ule", file);
16904 fputs ("ult", file);
16907 fputs ("une", file);
16910 output_operand_lossage ("operand is not a condition code, "
16911 "invalid operand code 'Y'");
16917 /* Little bit of braindamage here. The SSE compare instructions
16918 does use completely different names for the comparisons that the
16919 fp conditional moves. */
16920 switch (GET_CODE (x))
16925 fputs ("eq_us", file);
16929 fputs ("eq", file);
16934 fputs ("nge", file);
16938 fputs ("lt", file);
16943 fputs ("ngt", file);
16947 fputs ("le", file);
16950 fputs ("unord", file);
16955 fputs ("neq_oq", file);
16959 fputs ("neq", file);
16964 fputs ("ge", file);
16968 fputs ("nlt", file);
16973 fputs ("gt", file);
16977 fputs ("nle", file);
16980 fputs ("ord", file);
16983 output_operand_lossage ("operand is not a condition code, "
16984 "invalid operand code 'D'");
16991 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16992 if (ASSEMBLER_DIALECT == ASM_ATT)
16998 if (!COMPARISON_P (x))
17000 output_operand_lossage ("operand is not a condition code, "
17001 "invalid operand code '%c'", code);
17004 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
17005 code == 'c' || code == 'f',
17006 code == 'F' || code == 'f',
17011 if (!offsettable_memref_p (x))
17013 output_operand_lossage ("operand is not an offsettable memory "
17014 "reference, invalid operand code 'H'");
17017 /* It doesn't actually matter what mode we use here, as we're
17018 only going to use this for printing. */
17019 x = adjust_address_nv (x, DImode, 8);
17020 /* Output 'qword ptr' for intel assembler dialect. */
17021 if (ASSEMBLER_DIALECT == ASM_INTEL)
17026 gcc_assert (CONST_INT_P (x));
17028 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17029 #ifdef HAVE_AS_IX86_HLE
17030 fputs ("xacquire ", file);
17032 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17034 else if (INTVAL (x) & IX86_HLE_RELEASE)
17035 #ifdef HAVE_AS_IX86_HLE
17036 fputs ("xrelease ", file);
17038 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17040 /* We do not want to print value of the operand. */
17044 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17045 fputs ("{z}", file);
17049 gcc_assert (CONST_INT_P (x));
17050 gcc_assert (INTVAL (x) == ROUND_SAE);
17052 if (ASSEMBLER_DIALECT == ASM_INTEL)
17053 fputs (", ", file);
17055 fputs ("{sae}", file);
17057 if (ASSEMBLER_DIALECT == ASM_ATT)
17058 fputs (", ", file);
17063 gcc_assert (CONST_INT_P (x));
17065 if (ASSEMBLER_DIALECT == ASM_INTEL)
17066 fputs (", ", file);
17068 switch (INTVAL (x))
17070 case ROUND_NEAREST_INT | ROUND_SAE:
17071 fputs ("{rn-sae}", file);
17073 case ROUND_NEG_INF | ROUND_SAE:
17074 fputs ("{rd-sae}", file);
17076 case ROUND_POS_INF | ROUND_SAE:
17077 fputs ("{ru-sae}", file);
17079 case ROUND_ZERO | ROUND_SAE:
17080 fputs ("{rz-sae}", file);
17083 gcc_unreachable ();
17086 if (ASSEMBLER_DIALECT == ASM_ATT)
17087 fputs (", ", file);
17092 if (ASSEMBLER_DIALECT == ASM_ATT)
17098 const char *name = get_some_local_dynamic_name ();
17100 output_operand_lossage ("'%%&' used without any "
17101 "local dynamic TLS references");
17103 assemble_name (file, name);
17112 || optimize_function_for_size_p (cfun)
17113 || !TARGET_BRANCH_PREDICTION_HINTS)
17116 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17119 int pred_val = XINT (x, 0);
17121 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17122 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17124 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17126 = final_forward_branch_p (current_output_insn) == 0;
17128 /* Emit hints only in the case default branch prediction
17129 heuristics would fail. */
17130 if (taken != cputaken)
17132 /* We use 3e (DS) prefix for taken branches and
17133 2e (CS) prefix for not taken branches. */
17135 fputs ("ds ; ", file);
17137 fputs ("cs ; ", file);
17145 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17151 if (ASSEMBLER_DIALECT == ASM_ATT)
17154 /* The kernel uses a different segment register for performance
17155 reasons; a system call would not have to trash the userspace
17156 segment register, which would be expensive. */
17157 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17158 fputs ("fs", file);
17160 fputs ("gs", file);
17164 putc (TARGET_AVX2 ? 'i' : 'f', file);
17168 if (TARGET_64BIT && Pmode != word_mode)
17169 fputs ("addr32 ", file);
17173 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17174 fputs ("bnd ", file);
17178 output_operand_lossage ("invalid operand code '%c'", code);
17183 print_reg (x, code, file);
17185 else if (MEM_P (x))
17187 rtx addr = XEXP (x, 0);
17189 /* No `byte ptr' prefix for call instructions ... */
17190 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17192 machine_mode mode = GET_MODE (x);
17195 /* Check for explicit size override codes. */
17198 else if (code == 'w')
17200 else if (code == 'k')
17202 else if (code == 'q')
17204 else if (code == 'x')
17206 else if (mode == BLKmode)
17207 /* ... or BLKmode operands, when not overridden. */
17210 switch (GET_MODE_SIZE (mode))
17212 case 1: size = "BYTE"; break;
17213 case 2: size = "WORD"; break;
17214 case 4: size = "DWORD"; break;
17215 case 8: size = "QWORD"; break;
17216 case 12: size = "TBYTE"; break;
17218 if (mode == XFmode)
17223 case 32: size = "YMMWORD"; break;
17224 case 64: size = "ZMMWORD"; break;
17226 gcc_unreachable ();
17230 fputs (size, file);
17231 fputs (" PTR ", file);
17235 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17236 output_operand_lossage ("invalid constraints for operand");
17238 ix86_print_operand_address_as
17239 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17242 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17246 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17248 if (ASSEMBLER_DIALECT == ASM_ATT)
17250 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17252 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17253 (unsigned long long) (int) l);
17255 fprintf (file, "0x%08x", (unsigned int) l);
17258 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17262 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17264 if (ASSEMBLER_DIALECT == ASM_ATT)
17266 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17269 /* These float cases don't actually occur as immediate operands. */
17270 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17274 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17275 fputs (dstr, file);
17280 /* We have patterns that allow zero sets of memory, for instance.
17281 In 64-bit mode, we should probably support all 8-byte vectors,
17282 since we can in fact encode that into an immediate. */
17283 if (GET_CODE (x) == CONST_VECTOR)
17285 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17289 if (code != 'P' && code != 'p')
17291 if (CONST_INT_P (x))
17293 if (ASSEMBLER_DIALECT == ASM_ATT)
17296 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17297 || GET_CODE (x) == LABEL_REF)
17299 if (ASSEMBLER_DIALECT == ASM_ATT)
17302 fputs ("OFFSET FLAT:", file);
17305 if (CONST_INT_P (x))
17306 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17307 else if (flag_pic || MACHOPIC_INDIRECT)
17308 output_pic_addr_const (file, x, code);
17310 output_addr_const (file, x);
17315 ix86_print_operand_punct_valid_p (unsigned char code)
17317 return (code == '@' || code == '*' || code == '+' || code == '&'
17318 || code == ';' || code == '~' || code == '^' || code == '!');
17321 /* Print a memory operand whose address is ADDR. */
17324 ix86_print_operand_address_as (FILE *file, rtx addr,
17325 addr_space_t as, bool no_rip)
17327 struct ix86_address parts;
17328 rtx base, index, disp;
17334 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17336 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17337 gcc_assert (parts.index == NULL_RTX);
17338 parts.index = XVECEXP (addr, 0, 1);
17339 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17340 addr = XVECEXP (addr, 0, 0);
17343 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17345 gcc_assert (TARGET_64BIT);
17346 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17349 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17351 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17352 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17353 if (parts.base != NULL_RTX)
17355 parts.index = parts.base;
17358 parts.base = XVECEXP (addr, 0, 0);
17359 addr = XVECEXP (addr, 0, 0);
17361 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17363 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17364 gcc_assert (parts.index == NULL_RTX);
17365 parts.index = XVECEXP (addr, 0, 1);
17366 addr = XVECEXP (addr, 0, 0);
17369 ok = ix86_decompose_address (addr, &parts);
17374 index = parts.index;
17376 scale = parts.scale;
17378 if (ADDR_SPACE_GENERIC_P (as))
17381 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17383 if (!ADDR_SPACE_GENERIC_P (as))
17385 const char *string;
17387 if (as == ADDR_SPACE_SEG_TLS)
17388 as = DEFAULT_TLS_SEG_REG;
17389 if (as == ADDR_SPACE_SEG_FS)
17390 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17391 else if (as == ADDR_SPACE_SEG_GS)
17392 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17394 gcc_unreachable ();
17395 fputs (string, file);
17398 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17399 if (TARGET_64BIT && !base && !index && !no_rip)
17403 if (GET_CODE (disp) == CONST
17404 && GET_CODE (XEXP (disp, 0)) == PLUS
17405 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17406 symbol = XEXP (XEXP (disp, 0), 0);
17408 if (GET_CODE (symbol) == LABEL_REF
17409 || (GET_CODE (symbol) == SYMBOL_REF
17410 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17414 if (!base && !index)
17416 /* Displacement only requires special attention. */
17417 if (CONST_INT_P (disp))
17419 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
17420 fputs ("ds:", file);
17421 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17424 output_pic_addr_const (file, disp, 0);
17426 output_addr_const (file, disp);
17430 /* Print SImode register names to force addr32 prefix. */
17431 if (SImode_address_operand (addr, VOIDmode))
17435 gcc_assert (TARGET_64BIT);
17436 switch (GET_CODE (addr))
17439 gcc_assert (GET_MODE (addr) == SImode);
17440 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17444 gcc_assert (GET_MODE (addr) == DImode);
17447 gcc_unreachable ();
17450 gcc_assert (!code);
17456 && CONST_INT_P (disp)
17457 && INTVAL (disp) < -16*1024*1024)
17459 /* X32 runs in 64-bit mode, where displacement, DISP, in
17460 address DISP(%r64), is encoded as 32-bit immediate sign-
17461 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17462 address is %r64 + 0xffffffffbffffd00. When %r64 <
17463 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17464 which is invalid for x32. The correct address is %r64
17465 - 0x40000300 == 0xf7ffdd64. To properly encode
17466 -0x40000300(%r64) for x32, we zero-extend negative
17467 displacement by forcing addr32 prefix which truncates
17468 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17469 zero-extend all negative displacements, including -1(%rsp).
17470 However, for small negative displacements, sign-extension
17471 won't cause overflow. We only zero-extend negative
17472 displacements if they < -16*1024*1024, which is also used
17473 to check legitimate address displacements for PIC. */
17477 if (ASSEMBLER_DIALECT == ASM_ATT)
17482 output_pic_addr_const (file, disp, 0);
17483 else if (GET_CODE (disp) == LABEL_REF)
17484 output_asm_label (disp);
17486 output_addr_const (file, disp);
17491 print_reg (base, code, file);
17495 print_reg (index, vsib ? 0 : code, file);
17496 if (scale != 1 || vsib)
17497 fprintf (file, ",%d", scale);
17503 rtx offset = NULL_RTX;
17507 /* Pull out the offset of a symbol; print any symbol itself. */
17508 if (GET_CODE (disp) == CONST
17509 && GET_CODE (XEXP (disp, 0)) == PLUS
17510 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17512 offset = XEXP (XEXP (disp, 0), 1);
17513 disp = gen_rtx_CONST (VOIDmode,
17514 XEXP (XEXP (disp, 0), 0));
17518 output_pic_addr_const (file, disp, 0);
17519 else if (GET_CODE (disp) == LABEL_REF)
17520 output_asm_label (disp);
17521 else if (CONST_INT_P (disp))
17524 output_addr_const (file, disp);
17530 print_reg (base, code, file);
17533 if (INTVAL (offset) >= 0)
17535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17539 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17546 print_reg (index, vsib ? 0 : code, file);
17547 if (scale != 1 || vsib)
17548 fprintf (file, "*%d", scale);
17556 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
17558 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
17561 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17564 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17568 if (GET_CODE (x) != UNSPEC)
17571 op = XVECEXP (x, 0, 0);
17572 switch (XINT (x, 1))
17574 case UNSPEC_GOTTPOFF:
17575 output_addr_const (file, op);
17576 /* FIXME: This might be @TPOFF in Sun ld. */
17577 fputs ("@gottpoff", file);
17580 output_addr_const (file, op);
17581 fputs ("@tpoff", file);
17583 case UNSPEC_NTPOFF:
17584 output_addr_const (file, op);
17586 fputs ("@tpoff", file);
17588 fputs ("@ntpoff", file);
17590 case UNSPEC_DTPOFF:
17591 output_addr_const (file, op);
17592 fputs ("@dtpoff", file);
17594 case UNSPEC_GOTNTPOFF:
17595 output_addr_const (file, op);
17597 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17598 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17600 fputs ("@gotntpoff", file);
17602 case UNSPEC_INDNTPOFF:
17603 output_addr_const (file, op);
17604 fputs ("@indntpoff", file);
17607 case UNSPEC_MACHOPIC_OFFSET:
17608 output_addr_const (file, op);
17610 machopic_output_function_base_name (file);
17614 case UNSPEC_STACK_CHECK:
17618 gcc_assert (flag_split_stack);
17620 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17621 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17623 gcc_unreachable ();
17626 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17637 /* Split one or more double-mode RTL references into pairs of half-mode
17638 references. The RTL can be REG, offsettable MEM, integer constant, or
17639 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17640 split and "num" is its length. lo_half and hi_half are output arrays
17641 that parallel "operands". */
17644 split_double_mode (machine_mode mode, rtx operands[],
17645 int num, rtx lo_half[], rtx hi_half[])
17647 machine_mode half_mode;
17653 half_mode = DImode;
17656 half_mode = SImode;
17659 gcc_unreachable ();
17662 byte = GET_MODE_SIZE (half_mode);
17666 rtx op = operands[num];
17668 /* simplify_subreg refuse to split volatile memory addresses,
17669 but we still have to handle it. */
17672 lo_half[num] = adjust_address (op, half_mode, 0);
17673 hi_half[num] = adjust_address (op, half_mode, byte);
17677 lo_half[num] = simplify_gen_subreg (half_mode, op,
17678 GET_MODE (op) == VOIDmode
17679 ? mode : GET_MODE (op), 0);
17680 hi_half[num] = simplify_gen_subreg (half_mode, op,
17681 GET_MODE (op) == VOIDmode
17682 ? mode : GET_MODE (op), byte);
17687 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17688 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17689 is the expression of the binary operation. The output may either be
17690 emitted here, or returned to the caller, like all output_* functions.
17692 There is no guarantee that the operands are the same mode, as they
17693 might be within FLOAT or FLOAT_EXTEND expressions. */
17695 #ifndef SYSV386_COMPAT
17696 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17697 wants to fix the assemblers because that causes incompatibility
17698 with gcc. No-one wants to fix gcc because that causes
17699 incompatibility with assemblers... You can use the option of
17700 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17701 #define SYSV386_COMPAT 1
17705 output_387_binary_op (rtx insn, rtx *operands)
17707 static char buf[40];
17710 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17712 /* Even if we do not want to check the inputs, this documents input
17713 constraints. Which helps in understanding the following code. */
17716 if (STACK_REG_P (operands[0])
17717 && ((REG_P (operands[1])
17718 && REGNO (operands[0]) == REGNO (operands[1])
17719 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17720 || (REG_P (operands[2])
17721 && REGNO (operands[0]) == REGNO (operands[2])
17722 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17723 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17726 gcc_assert (is_sse);
17729 switch (GET_CODE (operands[3]))
17732 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17733 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17741 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17742 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17750 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17751 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17759 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17760 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17768 gcc_unreachable ();
17775 strcpy (buf, ssep);
17776 if (GET_MODE (operands[0]) == SFmode)
17777 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17779 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17783 strcpy (buf, ssep + 1);
17784 if (GET_MODE (operands[0]) == SFmode)
17785 strcat (buf, "ss\t{%2, %0|%0, %2}");
17787 strcat (buf, "sd\t{%2, %0|%0, %2}");
17793 switch (GET_CODE (operands[3]))
17797 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17798 std::swap (operands[1], operands[2]);
17800 /* know operands[0] == operands[1]. */
17802 if (MEM_P (operands[2]))
17808 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17810 if (STACK_TOP_P (operands[0]))
17811 /* How is it that we are storing to a dead operand[2]?
17812 Well, presumably operands[1] is dead too. We can't
17813 store the result to st(0) as st(0) gets popped on this
17814 instruction. Instead store to operands[2] (which I
17815 think has to be st(1)). st(1) will be popped later.
17816 gcc <= 2.8.1 didn't have this check and generated
17817 assembly code that the Unixware assembler rejected. */
17818 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17820 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17824 if (STACK_TOP_P (operands[0]))
17825 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17827 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17832 if (MEM_P (operands[1]))
17838 if (MEM_P (operands[2]))
17844 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17847 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17848 derived assemblers, confusingly reverse the direction of
17849 the operation for fsub{r} and fdiv{r} when the
17850 destination register is not st(0). The Intel assembler
17851 doesn't have this brain damage. Read !SYSV386_COMPAT to
17852 figure out what the hardware really does. */
17853 if (STACK_TOP_P (operands[0]))
17854 p = "{p\t%0, %2|rp\t%2, %0}";
17856 p = "{rp\t%2, %0|p\t%0, %2}";
17858 if (STACK_TOP_P (operands[0]))
17859 /* As above for fmul/fadd, we can't store to st(0). */
17860 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17862 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17867 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17870 if (STACK_TOP_P (operands[0]))
17871 p = "{rp\t%0, %1|p\t%1, %0}";
17873 p = "{p\t%1, %0|rp\t%0, %1}";
17875 if (STACK_TOP_P (operands[0]))
17876 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17878 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17883 if (STACK_TOP_P (operands[0]))
17885 if (STACK_TOP_P (operands[1]))
17886 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17888 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17891 else if (STACK_TOP_P (operands[1]))
17894 p = "{\t%1, %0|r\t%0, %1}";
17896 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17902 p = "{r\t%2, %0|\t%0, %2}";
17904 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17910 gcc_unreachable ();
17917 /* Check if a 256bit AVX register is referenced inside of EXP. */
17920 ix86_check_avx256_register (const_rtx exp)
17922 if (SUBREG_P (exp))
17923 exp = SUBREG_REG (exp);
17925 return (REG_P (exp)
17926 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17929 /* Return needed mode for entity in optimize_mode_switching pass. */
17932 ix86_avx_u128_mode_needed (rtx_insn *insn)
17938 /* Needed mode is set to AVX_U128_CLEAN if there are
17939 no 256bit modes used in function arguments. */
17940 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17942 link = XEXP (link, 1))
17944 if (GET_CODE (XEXP (link, 0)) == USE)
17946 rtx arg = XEXP (XEXP (link, 0), 0);
17948 if (ix86_check_avx256_register (arg))
17949 return AVX_U128_DIRTY;
17953 return AVX_U128_CLEAN;
17956 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17957 changes state only when a 256bit register is written to, but we need
17958 to prevent the compiler from moving optimal insertion point above
17959 eventual read from 256bit register. */
17960 subrtx_iterator::array_type array;
17961 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17962 if (ix86_check_avx256_register (*iter))
17963 return AVX_U128_DIRTY;
17965 return AVX_U128_ANY;
17968 /* Return mode that i387 must be switched into
17969 prior to the execution of insn. */
17972 ix86_i387_mode_needed (int entity, rtx_insn *insn)
17974 enum attr_i387_cw mode;
17976 /* The mode UNINITIALIZED is used to store control word after a
17977 function call or ASM pattern. The mode ANY specify that function
17978 has no requirements on the control word and make no changes in the
17979 bits we are interested in. */
17982 || (NONJUMP_INSN_P (insn)
17983 && (asm_noperands (PATTERN (insn)) >= 0
17984 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
17985 return I387_CW_UNINITIALIZED;
17987 if (recog_memoized (insn) < 0)
17988 return I387_CW_ANY;
17990 mode = get_attr_i387_cw (insn);
17995 if (mode == I387_CW_TRUNC)
18000 if (mode == I387_CW_FLOOR)
18005 if (mode == I387_CW_CEIL)
18010 if (mode == I387_CW_MASK_PM)
18015 gcc_unreachable ();
18018 return I387_CW_ANY;
18021 /* Return mode that entity must be switched into
18022 prior to the execution of insn. */
18025 ix86_mode_needed (int entity, rtx_insn *insn)
18030 return ix86_avx_u128_mode_needed (insn);
18035 return ix86_i387_mode_needed (entity, insn);
18037 gcc_unreachable ();
18042 /* Check if a 256bit AVX register is referenced in stores. */
18045 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18047 if (ix86_check_avx256_register (dest))
18049 bool *used = (bool *) data;
18054 /* Calculate mode of upper 128bit AVX registers after the insn. */
18057 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18059 rtx pat = PATTERN (insn);
18061 if (vzeroupper_operation (pat, VOIDmode)
18062 || vzeroall_operation (pat, VOIDmode))
18063 return AVX_U128_CLEAN;
18065 /* We know that state is clean after CALL insn if there are no
18066 256bit registers used in the function return register. */
18069 bool avx_reg256_found = false;
18070 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18072 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18075 /* Otherwise, return current mode. Remember that if insn
18076 references AVX 256bit registers, the mode was already changed
18077 to DIRTY from MODE_NEEDED. */
18081 /* Return the mode that an insn results in. */
18084 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18089 return ix86_avx_u128_mode_after (mode, insn);
18096 gcc_unreachable ();
18101 ix86_avx_u128_mode_entry (void)
18105 /* Entry mode is set to AVX_U128_DIRTY if there are
18106 256bit modes used in function arguments. */
18107 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18108 arg = TREE_CHAIN (arg))
18110 rtx incoming = DECL_INCOMING_RTL (arg);
18112 if (incoming && ix86_check_avx256_register (incoming))
18113 return AVX_U128_DIRTY;
18116 return AVX_U128_CLEAN;
18119 /* Return a mode that ENTITY is assumed to be
18120 switched to at function entry. */
18123 ix86_mode_entry (int entity)
18128 return ix86_avx_u128_mode_entry ();
18133 return I387_CW_ANY;
18135 gcc_unreachable ();
18140 ix86_avx_u128_mode_exit (void)
18142 rtx reg = crtl->return_rtx;
18144 /* Exit mode is set to AVX_U128_DIRTY if there are
18145 256bit modes used in the function return register. */
18146 if (reg && ix86_check_avx256_register (reg))
18147 return AVX_U128_DIRTY;
18149 return AVX_U128_CLEAN;
18152 /* Return a mode that ENTITY is assumed to be
18153 switched to at function exit. */
18156 ix86_mode_exit (int entity)
18161 return ix86_avx_u128_mode_exit ();
18166 return I387_CW_ANY;
18168 gcc_unreachable ();
18173 ix86_mode_priority (int, int n)
18178 /* Output code to initialize control word copies used by trunc?f?i and
18179 rounding patterns. CURRENT_MODE is set to current control word,
18180 while NEW_MODE is set to new control word. */
18183 emit_i387_cw_initialization (int mode)
18185 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18188 enum ix86_stack_slot slot;
18190 rtx reg = gen_reg_rtx (HImode);
18192 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18193 emit_move_insn (reg, copy_rtx (stored_mode));
18195 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18196 || optimize_insn_for_size_p ())
18200 case I387_CW_TRUNC:
18201 /* round toward zero (truncate) */
18202 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18203 slot = SLOT_CW_TRUNC;
18206 case I387_CW_FLOOR:
18207 /* round down toward -oo */
18208 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18209 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18210 slot = SLOT_CW_FLOOR;
18214 /* round up toward +oo */
18215 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18216 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18217 slot = SLOT_CW_CEIL;
18220 case I387_CW_MASK_PM:
18221 /* mask precision exception for nearbyint() */
18222 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18223 slot = SLOT_CW_MASK_PM;
18227 gcc_unreachable ();
18234 case I387_CW_TRUNC:
18235 /* round toward zero (truncate) */
18236 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18237 slot = SLOT_CW_TRUNC;
18240 case I387_CW_FLOOR:
18241 /* round down toward -oo */
18242 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18243 slot = SLOT_CW_FLOOR;
18247 /* round up toward +oo */
18248 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18249 slot = SLOT_CW_CEIL;
18252 case I387_CW_MASK_PM:
18253 /* mask precision exception for nearbyint() */
18254 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18255 slot = SLOT_CW_MASK_PM;
18259 gcc_unreachable ();
18263 gcc_assert (slot < MAX_386_STACK_LOCALS);
18265 new_mode = assign_386_stack_local (HImode, slot);
18266 emit_move_insn (new_mode, reg);
18269 /* Emit vzeroupper. */
18272 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18276 /* Cancel automatic vzeroupper insertion if there are
18277 live call-saved SSE registers at the insertion point. */
18279 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18280 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18284 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18285 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18288 emit_insn (gen_avx_vzeroupper ());
18291 /* Generate one or more insns to set ENTITY to MODE. */
18293 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18294 is the set of hard registers live at the point where the insn(s)
18295 are to be inserted. */
18298 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18299 HARD_REG_SET regs_live)
18304 if (mode == AVX_U128_CLEAN)
18305 ix86_avx_emit_vzeroupper (regs_live);
18311 if (mode != I387_CW_ANY
18312 && mode != I387_CW_UNINITIALIZED)
18313 emit_i387_cw_initialization (mode);
18316 gcc_unreachable ();
18320 /* Output code for INSN to convert a float to a signed int. OPERANDS
18321 are the insn operands. The output may be [HSD]Imode and the input
18322 operand may be [SDX]Fmode. */
18325 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18327 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18328 int dimode_p = GET_MODE (operands[0]) == DImode;
18329 int round_mode = get_attr_i387_cw (insn);
18331 /* Jump through a hoop or two for DImode, since the hardware has no
18332 non-popping instruction. We used to do this a different way, but
18333 that was somewhat fragile and broke with post-reload splitters. */
18334 if ((dimode_p || fisttp) && !stack_top_dies)
18335 output_asm_insn ("fld\t%y1", operands);
18337 gcc_assert (STACK_TOP_P (operands[1]));
18338 gcc_assert (MEM_P (operands[0]));
18339 gcc_assert (GET_MODE (operands[1]) != TFmode);
18342 output_asm_insn ("fisttp%Z0\t%0", operands);
18345 if (round_mode != I387_CW_ANY)
18346 output_asm_insn ("fldcw\t%3", operands);
18347 if (stack_top_dies || dimode_p)
18348 output_asm_insn ("fistp%Z0\t%0", operands);
18350 output_asm_insn ("fist%Z0\t%0", operands);
18351 if (round_mode != I387_CW_ANY)
18352 output_asm_insn ("fldcw\t%2", operands);
18358 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18359 have the values zero or one, indicates the ffreep insn's operand
18360 from the OPERANDS array. */
18362 static const char *
18363 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18365 if (TARGET_USE_FFREEP)
18366 #ifdef HAVE_AS_IX86_FFREEP
18367 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18370 static char retval[32];
18371 int regno = REGNO (operands[opno]);
18373 gcc_assert (STACK_REGNO_P (regno));
18375 regno -= FIRST_STACK_REG;
18377 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18382 return opno ? "fstp\t%y1" : "fstp\t%y0";
18386 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18387 should be used. UNORDERED_P is true when fucom should be used. */
18390 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18392 int stack_top_dies;
18393 rtx cmp_op0, cmp_op1;
18394 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18398 cmp_op0 = operands[0];
18399 cmp_op1 = operands[1];
18403 cmp_op0 = operands[1];
18404 cmp_op1 = operands[2];
18409 if (GET_MODE (operands[0]) == SFmode)
18411 return "%vucomiss\t{%1, %0|%0, %1}";
18413 return "%vcomiss\t{%1, %0|%0, %1}";
18416 return "%vucomisd\t{%1, %0|%0, %1}";
18418 return "%vcomisd\t{%1, %0|%0, %1}";
18421 gcc_assert (STACK_TOP_P (cmp_op0));
18423 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18425 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18427 if (stack_top_dies)
18429 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18430 return output_387_ffreep (operands, 1);
18433 return "ftst\n\tfnstsw\t%0";
18436 if (STACK_REG_P (cmp_op1)
18438 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18439 && REGNO (cmp_op1) != FIRST_STACK_REG)
18441 /* If both the top of the 387 stack dies, and the other operand
18442 is also a stack register that dies, then this must be a
18443 `fcompp' float compare */
18447 /* There is no double popping fcomi variant. Fortunately,
18448 eflags is immune from the fstp's cc clobbering. */
18450 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18452 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18453 return output_387_ffreep (operands, 0);
18458 return "fucompp\n\tfnstsw\t%0";
18460 return "fcompp\n\tfnstsw\t%0";
18465 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18467 static const char * const alt[16] =
18469 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18470 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18471 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18472 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18474 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18475 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18479 "fcomi\t{%y1, %0|%0, %y1}",
18480 "fcomip\t{%y1, %0|%0, %y1}",
18481 "fucomi\t{%y1, %0|%0, %y1}",
18482 "fucomip\t{%y1, %0|%0, %y1}",
18493 mask = eflags_p << 3;
18494 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18495 mask |= unordered_p << 1;
18496 mask |= stack_top_dies;
18498 gcc_assert (mask < 16);
18507 ix86_output_addr_vec_elt (FILE *file, int value)
18509 const char *directive = ASM_LONG;
18513 directive = ASM_QUAD;
18515 gcc_assert (!TARGET_64BIT);
18518 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18522 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18524 const char *directive = ASM_LONG;
18527 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18528 directive = ASM_QUAD;
18530 gcc_assert (!TARGET_64BIT);
18532 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18533 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18534 fprintf (file, "%s%s%d-%s%d\n",
18535 directive, LPREFIX, value, LPREFIX, rel);
18536 else if (HAVE_AS_GOTOFF_IN_DATA)
18537 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18539 else if (TARGET_MACHO)
18541 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18542 machopic_output_function_base_name (file);
18547 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18548 GOT_SYMBOL_NAME, LPREFIX, value);
18551 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18555 ix86_expand_clear (rtx dest)
18559 /* We play register width games, which are only valid after reload. */
18560 gcc_assert (reload_completed);
18562 /* Avoid HImode and its attendant prefix byte. */
18563 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18564 dest = gen_rtx_REG (SImode, REGNO (dest));
18565 tmp = gen_rtx_SET (dest, const0_rtx);
18567 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18569 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18570 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18576 /* X is an unchanging MEM. If it is a constant pool reference, return
18577 the constant pool rtx, else NULL. */
18580 maybe_get_pool_constant (rtx x)
18582 x = ix86_delegitimize_address (XEXP (x, 0));
18584 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18585 return get_pool_constant (x);
18591 ix86_expand_move (machine_mode mode, rtx operands[])
18594 enum tls_model model;
18599 if (GET_CODE (op1) == SYMBOL_REF)
18603 model = SYMBOL_REF_TLS_MODEL (op1);
18606 op1 = legitimize_tls_address (op1, model, true);
18607 op1 = force_operand (op1, op0);
18610 op1 = convert_to_mode (mode, op1, 1);
18612 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18615 else if (GET_CODE (op1) == CONST
18616 && GET_CODE (XEXP (op1, 0)) == PLUS
18617 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18619 rtx addend = XEXP (XEXP (op1, 0), 1);
18620 rtx symbol = XEXP (XEXP (op1, 0), 0);
18623 model = SYMBOL_REF_TLS_MODEL (symbol);
18625 tmp = legitimize_tls_address (symbol, model, true);
18627 tmp = legitimize_pe_coff_symbol (symbol, true);
18631 tmp = force_operand (tmp, NULL);
18632 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18633 op0, 1, OPTAB_DIRECT);
18636 op1 = convert_to_mode (mode, tmp, 1);
18640 if ((flag_pic || MACHOPIC_INDIRECT)
18641 && symbolic_operand (op1, mode))
18643 if (TARGET_MACHO && !TARGET_64BIT)
18646 /* dynamic-no-pic */
18647 if (MACHOPIC_INDIRECT)
18649 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18650 ? op0 : gen_reg_rtx (Pmode);
18651 op1 = machopic_indirect_data_reference (op1, temp);
18653 op1 = machopic_legitimize_pic_address (op1, mode,
18654 temp == op1 ? 0 : temp);
18656 if (op0 != op1 && GET_CODE (op0) != MEM)
18658 rtx insn = gen_rtx_SET (op0, op1);
18662 if (GET_CODE (op0) == MEM)
18663 op1 = force_reg (Pmode, op1);
18667 if (GET_CODE (temp) != REG)
18668 temp = gen_reg_rtx (Pmode);
18669 temp = legitimize_pic_address (op1, temp);
18674 /* dynamic-no-pic */
18680 op1 = force_reg (mode, op1);
18681 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18683 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18684 op1 = legitimize_pic_address (op1, reg);
18687 op1 = convert_to_mode (mode, op1, 1);
18694 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18695 || !push_operand (op0, mode))
18697 op1 = force_reg (mode, op1);
18699 if (push_operand (op0, mode)
18700 && ! general_no_elim_operand (op1, mode))
18701 op1 = copy_to_mode_reg (mode, op1);
18703 /* Force large constants in 64bit compilation into register
18704 to get them CSEed. */
18705 if (can_create_pseudo_p ()
18706 && (mode == DImode) && TARGET_64BIT
18707 && immediate_operand (op1, mode)
18708 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18709 && !register_operand (op0, mode)
18711 op1 = copy_to_mode_reg (mode, op1);
18713 if (can_create_pseudo_p ()
18714 && CONST_DOUBLE_P (op1))
18716 /* If we are loading a floating point constant to a register,
18717 force the value to memory now, since we'll get better code
18718 out the back end. */
18720 op1 = validize_mem (force_const_mem (mode, op1));
18721 if (!register_operand (op0, mode))
18723 rtx temp = gen_reg_rtx (mode);
18724 emit_insn (gen_rtx_SET (temp, op1));
18725 emit_move_insn (op0, temp);
18731 emit_insn (gen_rtx_SET (op0, op1));
18735 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18737 rtx op0 = operands[0], op1 = operands[1];
18738 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18739 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18740 unsigned int align = (TARGET_IAMCU
18741 ? GET_MODE_BITSIZE (mode)
18742 : GET_MODE_ALIGNMENT (mode));
18744 if (push_operand (op0, VOIDmode))
18745 op0 = emit_move_resolve_push (mode, op0);
18747 /* Force constants other than zero into memory. We do not know how
18748 the instructions used to build constants modify the upper 64 bits
18749 of the register, once we have that information we may be able
18750 to handle some of them more efficiently. */
18751 if (can_create_pseudo_p ()
18752 && register_operand (op0, mode)
18753 && (CONSTANT_P (op1)
18755 && CONSTANT_P (SUBREG_REG (op1))))
18756 && !standard_sse_constant_p (op1))
18757 op1 = validize_mem (force_const_mem (mode, op1));
18759 /* We need to check memory alignment for SSE mode since attribute
18760 can make operands unaligned. */
18761 if (can_create_pseudo_p ()
18762 && SSE_REG_MODE_P (mode)
18763 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18764 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18768 /* ix86_expand_vector_move_misalign() does not like constants ... */
18769 if (CONSTANT_P (op1)
18771 && CONSTANT_P (SUBREG_REG (op1))))
18772 op1 = validize_mem (force_const_mem (mode, op1));
18774 /* ... nor both arguments in memory. */
18775 if (!register_operand (op0, mode)
18776 && !register_operand (op1, mode))
18777 op1 = force_reg (mode, op1);
18779 tmp[0] = op0; tmp[1] = op1;
18780 ix86_expand_vector_move_misalign (mode, tmp);
18784 /* Make operand1 a register if it isn't already. */
18785 if (can_create_pseudo_p ()
18786 && !register_operand (op0, mode)
18787 && !register_operand (op1, mode))
18789 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18793 emit_insn (gen_rtx_SET (op0, op1));
18796 /* Split 32-byte AVX unaligned load and store if needed. */
18799 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18802 rtx (*extract) (rtx, rtx, rtx);
18803 rtx (*load_unaligned) (rtx, rtx);
18804 rtx (*store_unaligned) (rtx, rtx);
18807 switch (GET_MODE (op0))
18810 gcc_unreachable ();
18812 extract = gen_avx_vextractf128v32qi;
18813 load_unaligned = gen_avx_loaddquv32qi;
18814 store_unaligned = gen_avx_storedquv32qi;
18818 extract = gen_avx_vextractf128v8sf;
18819 load_unaligned = gen_avx_loadups256;
18820 store_unaligned = gen_avx_storeups256;
18824 extract = gen_avx_vextractf128v4df;
18825 load_unaligned = gen_avx_loadupd256;
18826 store_unaligned = gen_avx_storeupd256;
18833 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18834 && optimize_insn_for_speed_p ())
18836 rtx r = gen_reg_rtx (mode);
18837 m = adjust_address (op1, mode, 0);
18838 emit_move_insn (r, m);
18839 m = adjust_address (op1, mode, 16);
18840 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18841 emit_move_insn (op0, r);
18843 /* Normal *mov<mode>_internal pattern will handle
18844 unaligned loads just fine if misaligned_operand
18845 is true, and without the UNSPEC it can be combined
18846 with arithmetic instructions. */
18847 else if (misaligned_operand (op1, GET_MODE (op1)))
18848 emit_insn (gen_rtx_SET (op0, op1));
18850 emit_insn (load_unaligned (op0, op1));
18852 else if (MEM_P (op0))
18854 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18855 && optimize_insn_for_speed_p ())
18857 m = adjust_address (op0, mode, 0);
18858 emit_insn (extract (m, op1, const0_rtx));
18859 m = adjust_address (op0, mode, 16);
18860 emit_insn (extract (m, op1, const1_rtx));
18863 emit_insn (store_unaligned (op0, op1));
18866 gcc_unreachable ();
18869 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18870 straight to ix86_expand_vector_move. */
18871 /* Code generation for scalar reg-reg moves of single and double precision data:
18872 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18876 if (x86_sse_partial_reg_dependency == true)
18881 Code generation for scalar loads of double precision data:
18882 if (x86_sse_split_regs == true)
18883 movlpd mem, reg (gas syntax)
18887 Code generation for unaligned packed loads of single precision data
18888 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18889 if (x86_sse_unaligned_move_optimal)
18892 if (x86_sse_partial_reg_dependency == true)
18904 Code generation for unaligned packed loads of double precision data
18905 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18906 if (x86_sse_unaligned_move_optimal)
18909 if (x86_sse_split_regs == true)
18922 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18924 rtx op0, op1, orig_op0 = NULL_RTX, m;
18925 rtx (*load_unaligned) (rtx, rtx);
18926 rtx (*store_unaligned) (rtx, rtx);
18931 if (GET_MODE_SIZE (mode) == 64)
18933 switch (GET_MODE_CLASS (mode))
18935 case MODE_VECTOR_INT:
18937 if (GET_MODE (op0) != V16SImode)
18942 op0 = gen_reg_rtx (V16SImode);
18945 op0 = gen_lowpart (V16SImode, op0);
18947 op1 = gen_lowpart (V16SImode, op1);
18950 case MODE_VECTOR_FLOAT:
18951 switch (GET_MODE (op0))
18954 gcc_unreachable ();
18956 load_unaligned = gen_avx512f_loaddquv16si;
18957 store_unaligned = gen_avx512f_storedquv16si;
18960 load_unaligned = gen_avx512f_loadups512;
18961 store_unaligned = gen_avx512f_storeups512;
18964 load_unaligned = gen_avx512f_loadupd512;
18965 store_unaligned = gen_avx512f_storeupd512;
18970 emit_insn (load_unaligned (op0, op1));
18971 else if (MEM_P (op0))
18972 emit_insn (store_unaligned (op0, op1));
18974 gcc_unreachable ();
18976 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18980 gcc_unreachable ();
18987 && GET_MODE_SIZE (mode) == 32)
18989 switch (GET_MODE_CLASS (mode))
18991 case MODE_VECTOR_INT:
18993 if (GET_MODE (op0) != V32QImode)
18998 op0 = gen_reg_rtx (V32QImode);
19001 op0 = gen_lowpart (V32QImode, op0);
19003 op1 = gen_lowpart (V32QImode, op1);
19006 case MODE_VECTOR_FLOAT:
19007 ix86_avx256_split_vector_move_misalign (op0, op1);
19009 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19013 gcc_unreachable ();
19021 /* Normal *mov<mode>_internal pattern will handle
19022 unaligned loads just fine if misaligned_operand
19023 is true, and without the UNSPEC it can be combined
19024 with arithmetic instructions. */
19026 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
19027 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19028 && misaligned_operand (op1, GET_MODE (op1)))
19029 emit_insn (gen_rtx_SET (op0, op1));
19030 /* ??? If we have typed data, then it would appear that using
19031 movdqu is the only way to get unaligned data loaded with
19033 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19035 if (GET_MODE (op0) != V16QImode)
19038 op0 = gen_reg_rtx (V16QImode);
19040 op1 = gen_lowpart (V16QImode, op1);
19041 /* We will eventually emit movups based on insn attributes. */
19042 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
19044 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19046 else if (TARGET_SSE2 && mode == V2DFmode)
19051 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19052 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19053 || optimize_insn_for_size_p ())
19055 /* We will eventually emit movups based on insn attributes. */
19056 emit_insn (gen_sse2_loadupd (op0, op1));
19060 /* When SSE registers are split into halves, we can avoid
19061 writing to the top half twice. */
19062 if (TARGET_SSE_SPLIT_REGS)
19064 emit_clobber (op0);
19069 /* ??? Not sure about the best option for the Intel chips.
19070 The following would seem to satisfy; the register is
19071 entirely cleared, breaking the dependency chain. We
19072 then store to the upper half, with a dependency depth
19073 of one. A rumor has it that Intel recommends two movsd
19074 followed by an unpacklpd, but this is unconfirmed. And
19075 given that the dependency depth of the unpacklpd would
19076 still be one, I'm not sure why this would be better. */
19077 zero = CONST0_RTX (V2DFmode);
19080 m = adjust_address (op1, DFmode, 0);
19081 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19082 m = adjust_address (op1, DFmode, 8);
19083 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19090 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19091 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19092 || optimize_insn_for_size_p ())
19094 if (GET_MODE (op0) != V4SFmode)
19097 op0 = gen_reg_rtx (V4SFmode);
19099 op1 = gen_lowpart (V4SFmode, op1);
19100 emit_insn (gen_sse_loadups (op0, op1));
19102 emit_move_insn (orig_op0,
19103 gen_lowpart (GET_MODE (orig_op0), op0));
19107 if (mode != V4SFmode)
19108 t = gen_reg_rtx (V4SFmode);
19112 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19113 emit_move_insn (t, CONST0_RTX (V4SFmode));
19117 m = adjust_address (op1, V2SFmode, 0);
19118 emit_insn (gen_sse_loadlps (t, t, m));
19119 m = adjust_address (op1, V2SFmode, 8);
19120 emit_insn (gen_sse_loadhps (t, t, m));
19121 if (mode != V4SFmode)
19122 emit_move_insn (op0, gen_lowpart (mode, t));
19125 else if (MEM_P (op0))
19127 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19129 op0 = gen_lowpart (V16QImode, op0);
19130 op1 = gen_lowpart (V16QImode, op1);
19131 /* We will eventually emit movups based on insn attributes. */
19132 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19134 else if (TARGET_SSE2 && mode == V2DFmode)
19137 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19138 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19139 || optimize_insn_for_size_p ())
19140 /* We will eventually emit movups based on insn attributes. */
19141 emit_insn (gen_sse2_storeupd (op0, op1));
19144 m = adjust_address (op0, DFmode, 0);
19145 emit_insn (gen_sse2_storelpd (m, op1));
19146 m = adjust_address (op0, DFmode, 8);
19147 emit_insn (gen_sse2_storehpd (m, op1));
19152 if (mode != V4SFmode)
19153 op1 = gen_lowpart (V4SFmode, op1);
19156 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19157 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19158 || optimize_insn_for_size_p ())
19160 op0 = gen_lowpart (V4SFmode, op0);
19161 emit_insn (gen_sse_storeups (op0, op1));
19165 m = adjust_address (op0, V2SFmode, 0);
19166 emit_insn (gen_sse_storelps (m, op1));
19167 m = adjust_address (op0, V2SFmode, 8);
19168 emit_insn (gen_sse_storehps (m, op1));
19173 gcc_unreachable ();
19176 /* Helper function of ix86_fixup_binary_operands to canonicalize
19177 operand order. Returns true if the operands should be swapped. */
19180 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19183 rtx dst = operands[0];
19184 rtx src1 = operands[1];
19185 rtx src2 = operands[2];
19187 /* If the operation is not commutative, we can't do anything. */
19188 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19191 /* Highest priority is that src1 should match dst. */
19192 if (rtx_equal_p (dst, src1))
19194 if (rtx_equal_p (dst, src2))
19197 /* Next highest priority is that immediate constants come second. */
19198 if (immediate_operand (src2, mode))
19200 if (immediate_operand (src1, mode))
19203 /* Lowest priority is that memory references should come second. */
19213 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19214 destination to use for the operation. If different from the true
19215 destination in operands[0], a copy operation will be required. */
19218 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19221 rtx dst = operands[0];
19222 rtx src1 = operands[1];
19223 rtx src2 = operands[2];
19225 /* Canonicalize operand order. */
19226 if (ix86_swap_binary_operands_p (code, mode, operands))
19228 /* It is invalid to swap operands of different modes. */
19229 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19231 std::swap (src1, src2);
19234 /* Both source operands cannot be in memory. */
19235 if (MEM_P (src1) && MEM_P (src2))
19237 /* Optimization: Only read from memory once. */
19238 if (rtx_equal_p (src1, src2))
19240 src2 = force_reg (mode, src2);
19243 else if (rtx_equal_p (dst, src1))
19244 src2 = force_reg (mode, src2);
19246 src1 = force_reg (mode, src1);
19249 /* If the destination is memory, and we do not have matching source
19250 operands, do things in registers. */
19251 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19252 dst = gen_reg_rtx (mode);
19254 /* Source 1 cannot be a constant. */
19255 if (CONSTANT_P (src1))
19256 src1 = force_reg (mode, src1);
19258 /* Source 1 cannot be a non-matching memory. */
19259 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19260 src1 = force_reg (mode, src1);
19262 /* Improve address combine. */
19264 && GET_MODE_CLASS (mode) == MODE_INT
19266 src2 = force_reg (mode, src2);
19268 operands[1] = src1;
19269 operands[2] = src2;
19273 /* Similarly, but assume that the destination has already been
19274 set up properly. */
19277 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19278 machine_mode mode, rtx operands[])
19280 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19281 gcc_assert (dst == operands[0]);
19284 /* Attempt to expand a binary operator. Make the expansion closer to the
19285 actual machine, then just general_operand, which will allow 3 separate
19286 memory references (one output, two input) in a single insn. */
19289 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19292 rtx src1, src2, dst, op, clob;
19294 dst = ix86_fixup_binary_operands (code, mode, operands);
19295 src1 = operands[1];
19296 src2 = operands[2];
19298 /* Emit the instruction. */
19300 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19302 if (reload_completed
19304 && !rtx_equal_p (dst, src1))
19306 /* This is going to be an LEA; avoid splitting it later. */
19311 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19312 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19315 /* Fix up the destination if needed. */
19316 if (dst != operands[0])
19317 emit_move_insn (operands[0], dst);
19320 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19321 the given OPERANDS. */
19324 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19327 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19328 if (SUBREG_P (operands[1]))
19333 else if (SUBREG_P (operands[2]))
19338 /* Optimize (__m128i) d | (__m128i) e and similar code
19339 when d and e are float vectors into float vector logical
19340 insn. In C/C++ without using intrinsics there is no other way
19341 to express vector logical operation on float vectors than
19342 to cast them temporarily to integer vectors. */
19344 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19345 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19346 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19347 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19348 && SUBREG_BYTE (op1) == 0
19349 && (GET_CODE (op2) == CONST_VECTOR
19350 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19351 && SUBREG_BYTE (op2) == 0))
19352 && can_create_pseudo_p ())
19355 switch (GET_MODE (SUBREG_REG (op1)))
19363 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19364 if (GET_CODE (op2) == CONST_VECTOR)
19366 op2 = gen_lowpart (GET_MODE (dst), op2);
19367 op2 = force_reg (GET_MODE (dst), op2);
19372 op2 = SUBREG_REG (operands[2]);
19373 if (!vector_operand (op2, GET_MODE (dst)))
19374 op2 = force_reg (GET_MODE (dst), op2);
19376 op1 = SUBREG_REG (op1);
19377 if (!vector_operand (op1, GET_MODE (dst)))
19378 op1 = force_reg (GET_MODE (dst), op1);
19379 emit_insn (gen_rtx_SET (dst,
19380 gen_rtx_fmt_ee (code, GET_MODE (dst),
19382 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19388 if (!vector_operand (operands[1], mode))
19389 operands[1] = force_reg (mode, operands[1]);
19390 if (!vector_operand (operands[2], mode))
19391 operands[2] = force_reg (mode, operands[2]);
19392 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19393 emit_insn (gen_rtx_SET (operands[0],
19394 gen_rtx_fmt_ee (code, mode, operands[1],
19398 /* Return TRUE or FALSE depending on whether the binary operator meets the
19399 appropriate constraints. */
19402 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19405 rtx dst = operands[0];
19406 rtx src1 = operands[1];
19407 rtx src2 = operands[2];
19409 /* Both source operands cannot be in memory. */
19410 if (MEM_P (src1) && MEM_P (src2))
19413 /* Canonicalize operand order for commutative operators. */
19414 if (ix86_swap_binary_operands_p (code, mode, operands))
19415 std::swap (src1, src2);
19417 /* If the destination is memory, we must have a matching source operand. */
19418 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19421 /* Source 1 cannot be a constant. */
19422 if (CONSTANT_P (src1))
19425 /* Source 1 cannot be a non-matching memory. */
19426 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19427 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19428 return (code == AND
19431 || (TARGET_64BIT && mode == DImode))
19432 && satisfies_constraint_L (src2));
19437 /* Attempt to expand a unary operator. Make the expansion closer to the
19438 actual machine, then just general_operand, which will allow 2 separate
19439 memory references (one output, one input) in a single insn. */
19442 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19445 bool matching_memory = false;
19446 rtx src, dst, op, clob;
19451 /* If the destination is memory, and we do not have matching source
19452 operands, do things in registers. */
19455 if (rtx_equal_p (dst, src))
19456 matching_memory = true;
19458 dst = gen_reg_rtx (mode);
19461 /* When source operand is memory, destination must match. */
19462 if (MEM_P (src) && !matching_memory)
19463 src = force_reg (mode, src);
19465 /* Emit the instruction. */
19467 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19473 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19474 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19477 /* Fix up the destination if needed. */
19478 if (dst != operands[0])
19479 emit_move_insn (operands[0], dst);
19482 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19483 divisor are within the range [0-255]. */
19486 ix86_split_idivmod (machine_mode mode, rtx operands[],
19489 rtx_code_label *end_label, *qimode_label;
19490 rtx insn, div, mod;
19491 rtx scratch, tmp0, tmp1, tmp2;
19492 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19493 rtx (*gen_zero_extend) (rtx, rtx);
19494 rtx (*gen_test_ccno_1) (rtx, rtx);
19499 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19500 gen_test_ccno_1 = gen_testsi_ccno_1;
19501 gen_zero_extend = gen_zero_extendqisi2;
19504 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19505 gen_test_ccno_1 = gen_testdi_ccno_1;
19506 gen_zero_extend = gen_zero_extendqidi2;
19509 gcc_unreachable ();
19512 end_label = gen_label_rtx ();
19513 qimode_label = gen_label_rtx ();
19515 scratch = gen_reg_rtx (mode);
19517 /* Use 8bit unsigned divimod if dividend and divisor are within
19518 the range [0-255]. */
19519 emit_move_insn (scratch, operands[2]);
19520 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19521 scratch, 1, OPTAB_DIRECT);
19522 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19523 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19524 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19525 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19526 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19528 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19529 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19530 JUMP_LABEL (insn) = qimode_label;
19532 /* Generate original signed/unsigned divimod. */
19533 div = gen_divmod4_1 (operands[0], operands[1],
19534 operands[2], operands[3]);
19537 /* Branch to the end. */
19538 emit_jump_insn (gen_jump (end_label));
19541 /* Generate 8bit unsigned divide. */
19542 emit_label (qimode_label);
19543 /* Don't use operands[0] for result of 8bit divide since not all
19544 registers support QImode ZERO_EXTRACT. */
19545 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19546 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19547 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19548 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19552 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19553 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19557 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19558 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19561 /* Extract remainder from AH. */
19562 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19563 if (REG_P (operands[1]))
19564 insn = emit_move_insn (operands[1], tmp1);
19567 /* Need a new scratch register since the old one has result
19569 scratch = gen_reg_rtx (mode);
19570 emit_move_insn (scratch, tmp1);
19571 insn = emit_move_insn (operands[1], scratch);
19573 set_unique_reg_note (insn, REG_EQUAL, mod);
19575 /* Zero extend quotient from AL. */
19576 tmp1 = gen_lowpart (QImode, tmp0);
19577 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19578 set_unique_reg_note (insn, REG_EQUAL, div);
19580 emit_label (end_label);
19583 #define LEA_MAX_STALL (3)
19584 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19586 /* Increase given DISTANCE in half-cycles according to
19587 dependencies between PREV and NEXT instructions.
19588 Add 1 half-cycle if there is no dependency and
19589 go to next cycle if there is some dependecy. */
19591 static unsigned int
19592 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19596 if (!prev || !next)
19597 return distance + (distance & 1) + 2;
19599 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19600 return distance + 1;
19602 FOR_EACH_INSN_USE (use, next)
19603 FOR_EACH_INSN_DEF (def, prev)
19604 if (!DF_REF_IS_ARTIFICIAL (def)
19605 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19606 return distance + (distance & 1) + 2;
19608 return distance + 1;
19611 /* Function checks if instruction INSN defines register number
19612 REGNO1 or REGNO2. */
19615 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19620 FOR_EACH_INSN_DEF (def, insn)
19621 if (DF_REF_REG_DEF_P (def)
19622 && !DF_REF_IS_ARTIFICIAL (def)
19623 && (regno1 == DF_REF_REGNO (def)
19624 || regno2 == DF_REF_REGNO (def)))
19630 /* Function checks if instruction INSN uses register number
19631 REGNO as a part of address expression. */
19634 insn_uses_reg_mem (unsigned int regno, rtx insn)
19638 FOR_EACH_INSN_USE (use, insn)
19639 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19645 /* Search backward for non-agu definition of register number REGNO1
19646 or register number REGNO2 in basic block starting from instruction
19647 START up to head of basic block or instruction INSN.
19649 Function puts true value into *FOUND var if definition was found
19650 and false otherwise.
19652 Distance in half-cycles between START and found instruction or head
19653 of BB is added to DISTANCE and returned. */
19656 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19657 rtx_insn *insn, int distance,
19658 rtx_insn *start, bool *found)
19660 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19661 rtx_insn *prev = start;
19662 rtx_insn *next = NULL;
19668 && distance < LEA_SEARCH_THRESHOLD)
19670 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19672 distance = increase_distance (prev, next, distance);
19673 if (insn_defines_reg (regno1, regno2, prev))
19675 if (recog_memoized (prev) < 0
19676 || get_attr_type (prev) != TYPE_LEA)
19685 if (prev == BB_HEAD (bb))
19688 prev = PREV_INSN (prev);
19694 /* Search backward for non-agu definition of register number REGNO1
19695 or register number REGNO2 in INSN's basic block until
19696 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19697 2. Reach neighbour BBs boundary, or
19698 3. Reach agu definition.
19699 Returns the distance between the non-agu definition point and INSN.
19700 If no definition point, returns -1. */
19703 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19706 basic_block bb = BLOCK_FOR_INSN (insn);
19708 bool found = false;
19710 if (insn != BB_HEAD (bb))
19711 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19712 distance, PREV_INSN (insn),
19715 if (!found && distance < LEA_SEARCH_THRESHOLD)
19719 bool simple_loop = false;
19721 FOR_EACH_EDGE (e, ei, bb->preds)
19724 simple_loop = true;
19729 distance = distance_non_agu_define_in_bb (regno1, regno2,
19731 BB_END (bb), &found);
19734 int shortest_dist = -1;
19735 bool found_in_bb = false;
19737 FOR_EACH_EDGE (e, ei, bb->preds)
19740 = distance_non_agu_define_in_bb (regno1, regno2,
19746 if (shortest_dist < 0)
19747 shortest_dist = bb_dist;
19748 else if (bb_dist > 0)
19749 shortest_dist = MIN (bb_dist, shortest_dist);
19755 distance = shortest_dist;
19759 /* get_attr_type may modify recog data. We want to make sure
19760 that recog data is valid for instruction INSN, on which
19761 distance_non_agu_define is called. INSN is unchanged here. */
19762 extract_insn_cached (insn);
19767 return distance >> 1;
19770 /* Return the distance in half-cycles between INSN and the next
19771 insn that uses register number REGNO in memory address added
19772 to DISTANCE. Return -1 if REGNO0 is set.
19774 Put true value into *FOUND if register usage was found and
19776 Put true value into *REDEFINED if register redefinition was
19777 found and false otherwise. */
19780 distance_agu_use_in_bb (unsigned int regno,
19781 rtx_insn *insn, int distance, rtx_insn *start,
19782 bool *found, bool *redefined)
19784 basic_block bb = NULL;
19785 rtx_insn *next = start;
19786 rtx_insn *prev = NULL;
19789 *redefined = false;
19791 if (start != NULL_RTX)
19793 bb = BLOCK_FOR_INSN (start);
19794 if (start != BB_HEAD (bb))
19795 /* If insn and start belong to the same bb, set prev to insn,
19796 so the call to increase_distance will increase the distance
19797 between insns by 1. */
19803 && distance < LEA_SEARCH_THRESHOLD)
19805 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19807 distance = increase_distance(prev, next, distance);
19808 if (insn_uses_reg_mem (regno, next))
19810 /* Return DISTANCE if OP0 is used in memory
19811 address in NEXT. */
19816 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19818 /* Return -1 if OP0 is set in NEXT. */
19826 if (next == BB_END (bb))
19829 next = NEXT_INSN (next);
19835 /* Return the distance between INSN and the next insn that uses
19836 register number REGNO0 in memory address. Return -1 if no such
19837 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19840 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19842 basic_block bb = BLOCK_FOR_INSN (insn);
19844 bool found = false;
19845 bool redefined = false;
19847 if (insn != BB_END (bb))
19848 distance = distance_agu_use_in_bb (regno0, insn, distance,
19850 &found, &redefined);
19852 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19856 bool simple_loop = false;
19858 FOR_EACH_EDGE (e, ei, bb->succs)
19861 simple_loop = true;
19866 distance = distance_agu_use_in_bb (regno0, insn,
19867 distance, BB_HEAD (bb),
19868 &found, &redefined);
19871 int shortest_dist = -1;
19872 bool found_in_bb = false;
19873 bool redefined_in_bb = false;
19875 FOR_EACH_EDGE (e, ei, bb->succs)
19878 = distance_agu_use_in_bb (regno0, insn,
19879 distance, BB_HEAD (e->dest),
19880 &found_in_bb, &redefined_in_bb);
19883 if (shortest_dist < 0)
19884 shortest_dist = bb_dist;
19885 else if (bb_dist > 0)
19886 shortest_dist = MIN (bb_dist, shortest_dist);
19892 distance = shortest_dist;
19896 if (!found || redefined)
19899 return distance >> 1;
19902 /* Define this macro to tune LEA priority vs ADD, it take effect when
19903 there is a dilemma of choicing LEA or ADD
19904 Negative value: ADD is more preferred than LEA
19906 Positive value: LEA is more preferred than ADD*/
19907 #define IX86_LEA_PRIORITY 0
19909 /* Return true if usage of lea INSN has performance advantage
19910 over a sequence of instructions. Instructions sequence has
19911 SPLIT_COST cycles higher latency than lea latency. */
19914 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19915 unsigned int regno2, int split_cost, bool has_scale)
19917 int dist_define, dist_use;
19919 /* For Silvermont if using a 2-source or 3-source LEA for
19920 non-destructive destination purposes, or due to wanting
19921 ability to use SCALE, the use of LEA is justified. */
19922 if (TARGET_SILVERMONT || TARGET_INTEL)
19926 if (split_cost < 1)
19928 if (regno0 == regno1 || regno0 == regno2)
19933 dist_define = distance_non_agu_define (regno1, regno2, insn);
19934 dist_use = distance_agu_use (regno0, insn);
19936 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19938 /* If there is no non AGU operand definition, no AGU
19939 operand usage and split cost is 0 then both lea
19940 and non lea variants have same priority. Currently
19941 we prefer lea for 64 bit code and non lea on 32 bit
19943 if (dist_use < 0 && split_cost == 0)
19944 return TARGET_64BIT || IX86_LEA_PRIORITY;
19949 /* With longer definitions distance lea is more preferable.
19950 Here we change it to take into account splitting cost and
19952 dist_define += split_cost + IX86_LEA_PRIORITY;
19954 /* If there is no use in memory addess then we just check
19955 that split cost exceeds AGU stall. */
19957 return dist_define > LEA_MAX_STALL;
19959 /* If this insn has both backward non-agu dependence and forward
19960 agu dependence, the one with short distance takes effect. */
19961 return dist_define >= dist_use;
19964 /* Return true if it is legal to clobber flags by INSN and
19965 false otherwise. */
19968 ix86_ok_to_clobber_flags (rtx_insn *insn)
19970 basic_block bb = BLOCK_FOR_INSN (insn);
19976 if (NONDEBUG_INSN_P (insn))
19978 FOR_EACH_INSN_USE (use, insn)
19979 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
19982 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
19986 if (insn == BB_END (bb))
19989 insn = NEXT_INSN (insn);
19992 live = df_get_live_out(bb);
19993 return !REGNO_REG_SET_P (live, FLAGS_REG);
19996 /* Return true if we need to split op0 = op1 + op2 into a sequence of
19997 move and add to avoid AGU stalls. */
20000 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
20002 unsigned int regno0, regno1, regno2;
20004 /* Check if we need to optimize. */
20005 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20008 /* Check it is correct to split here. */
20009 if (!ix86_ok_to_clobber_flags(insn))
20012 regno0 = true_regnum (operands[0]);
20013 regno1 = true_regnum (operands[1]);
20014 regno2 = true_regnum (operands[2]);
20016 /* We need to split only adds with non destructive
20017 destination operand. */
20018 if (regno0 == regno1 || regno0 == regno2)
20021 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20024 /* Return true if we should emit lea instruction instead of mov
20028 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20030 unsigned int regno0, regno1;
20032 /* Check if we need to optimize. */
20033 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20036 /* Use lea for reg to reg moves only. */
20037 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20040 regno0 = true_regnum (operands[0]);
20041 regno1 = true_regnum (operands[1]);
20043 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20046 /* Return true if we need to split lea into a sequence of
20047 instructions to avoid AGU stalls. */
20050 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20052 unsigned int regno0, regno1, regno2;
20054 struct ix86_address parts;
20057 /* Check we need to optimize. */
20058 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20061 /* The "at least two components" test below might not catch simple
20062 move or zero extension insns if parts.base is non-NULL and parts.disp
20063 is const0_rtx as the only components in the address, e.g. if the
20064 register is %rbp or %r13. As this test is much cheaper and moves or
20065 zero extensions are the common case, do this check first. */
20066 if (REG_P (operands[1])
20067 || (SImode_address_operand (operands[1], VOIDmode)
20068 && REG_P (XEXP (operands[1], 0))))
20071 /* Check if it is OK to split here. */
20072 if (!ix86_ok_to_clobber_flags (insn))
20075 ok = ix86_decompose_address (operands[1], &parts);
20078 /* There should be at least two components in the address. */
20079 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20080 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20083 /* We should not split into add if non legitimate pic
20084 operand is used as displacement. */
20085 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20088 regno0 = true_regnum (operands[0]) ;
20089 regno1 = INVALID_REGNUM;
20090 regno2 = INVALID_REGNUM;
20093 regno1 = true_regnum (parts.base);
20095 regno2 = true_regnum (parts.index);
20099 /* Compute how many cycles we will add to execution time
20100 if split lea into a sequence of instructions. */
20101 if (parts.base || parts.index)
20103 /* Have to use mov instruction if non desctructive
20104 destination form is used. */
20105 if (regno1 != regno0 && regno2 != regno0)
20108 /* Have to add index to base if both exist. */
20109 if (parts.base && parts.index)
20112 /* Have to use shift and adds if scale is 2 or greater. */
20113 if (parts.scale > 1)
20115 if (regno0 != regno1)
20117 else if (regno2 == regno0)
20120 split_cost += parts.scale;
20123 /* Have to use add instruction with immediate if
20124 disp is non zero. */
20125 if (parts.disp && parts.disp != const0_rtx)
20128 /* Subtract the price of lea. */
20132 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20136 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20137 matches destination. RTX includes clobber of FLAGS_REG. */
20140 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20145 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20146 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20148 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20151 /* Return true if regno1 def is nearest to the insn. */
20154 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20156 rtx_insn *prev = insn;
20157 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20161 while (prev && prev != start)
20163 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20165 prev = PREV_INSN (prev);
20168 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20170 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20172 prev = PREV_INSN (prev);
20175 /* None of the regs is defined in the bb. */
20179 /* Split lea instructions into a sequence of instructions
20180 which are executed on ALU to avoid AGU stalls.
20181 It is assumed that it is allowed to clobber flags register
20182 at lea position. */
20185 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20187 unsigned int regno0, regno1, regno2;
20188 struct ix86_address parts;
20192 ok = ix86_decompose_address (operands[1], &parts);
20195 target = gen_lowpart (mode, operands[0]);
20197 regno0 = true_regnum (target);
20198 regno1 = INVALID_REGNUM;
20199 regno2 = INVALID_REGNUM;
20203 parts.base = gen_lowpart (mode, parts.base);
20204 regno1 = true_regnum (parts.base);
20209 parts.index = gen_lowpart (mode, parts.index);
20210 regno2 = true_regnum (parts.index);
20214 parts.disp = gen_lowpart (mode, parts.disp);
20216 if (parts.scale > 1)
20218 /* Case r1 = r1 + ... */
20219 if (regno1 == regno0)
20221 /* If we have a case r1 = r1 + C * r2 then we
20222 should use multiplication which is very
20223 expensive. Assume cost model is wrong if we
20224 have such case here. */
20225 gcc_assert (regno2 != regno0);
20227 for (adds = parts.scale; adds > 0; adds--)
20228 ix86_emit_binop (PLUS, mode, target, parts.index);
20232 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20233 if (regno0 != regno2)
20234 emit_insn (gen_rtx_SET (target, parts.index));
20236 /* Use shift for scaling. */
20237 ix86_emit_binop (ASHIFT, mode, target,
20238 GEN_INT (exact_log2 (parts.scale)));
20241 ix86_emit_binop (PLUS, mode, target, parts.base);
20243 if (parts.disp && parts.disp != const0_rtx)
20244 ix86_emit_binop (PLUS, mode, target, parts.disp);
20247 else if (!parts.base && !parts.index)
20249 gcc_assert(parts.disp);
20250 emit_insn (gen_rtx_SET (target, parts.disp));
20256 if (regno0 != regno2)
20257 emit_insn (gen_rtx_SET (target, parts.index));
20259 else if (!parts.index)
20261 if (regno0 != regno1)
20262 emit_insn (gen_rtx_SET (target, parts.base));
20266 if (regno0 == regno1)
20268 else if (regno0 == regno2)
20274 /* Find better operand for SET instruction, depending
20275 on which definition is farther from the insn. */
20276 if (find_nearest_reg_def (insn, regno1, regno2))
20277 tmp = parts.index, tmp1 = parts.base;
20279 tmp = parts.base, tmp1 = parts.index;
20281 emit_insn (gen_rtx_SET (target, tmp));
20283 if (parts.disp && parts.disp != const0_rtx)
20284 ix86_emit_binop (PLUS, mode, target, parts.disp);
20286 ix86_emit_binop (PLUS, mode, target, tmp1);
20290 ix86_emit_binop (PLUS, mode, target, tmp);
20293 if (parts.disp && parts.disp != const0_rtx)
20294 ix86_emit_binop (PLUS, mode, target, parts.disp);
20298 /* Return true if it is ok to optimize an ADD operation to LEA
20299 operation to avoid flag register consumation. For most processors,
20300 ADD is faster than LEA. For the processors like BONNELL, if the
20301 destination register of LEA holds an actual address which will be
20302 used soon, LEA is better and otherwise ADD is better. */
20305 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20307 unsigned int regno0 = true_regnum (operands[0]);
20308 unsigned int regno1 = true_regnum (operands[1]);
20309 unsigned int regno2 = true_regnum (operands[2]);
20311 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20312 if (regno0 != regno1 && regno0 != regno2)
20315 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20318 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20321 /* Return true if destination reg of SET_BODY is shift count of
20325 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20331 /* Retrieve destination of SET_BODY. */
20332 switch (GET_CODE (set_body))
20335 set_dest = SET_DEST (set_body);
20336 if (!set_dest || !REG_P (set_dest))
20340 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20341 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20349 /* Retrieve shift count of USE_BODY. */
20350 switch (GET_CODE (use_body))
20353 shift_rtx = XEXP (use_body, 1);
20356 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20357 if (ix86_dep_by_shift_count_body (set_body,
20358 XVECEXP (use_body, 0, i)))
20366 && (GET_CODE (shift_rtx) == ASHIFT
20367 || GET_CODE (shift_rtx) == LSHIFTRT
20368 || GET_CODE (shift_rtx) == ASHIFTRT
20369 || GET_CODE (shift_rtx) == ROTATE
20370 || GET_CODE (shift_rtx) == ROTATERT))
20372 rtx shift_count = XEXP (shift_rtx, 1);
20374 /* Return true if shift count is dest of SET_BODY. */
20375 if (REG_P (shift_count))
20377 /* Add check since it can be invoked before register
20378 allocation in pre-reload schedule. */
20379 if (reload_completed
20380 && true_regnum (set_dest) == true_regnum (shift_count))
20382 else if (REGNO(set_dest) == REGNO(shift_count))
20390 /* Return true if destination reg of SET_INSN is shift count of
20394 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20396 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20397 PATTERN (use_insn));
20400 /* Return TRUE or FALSE depending on whether the unary operator meets the
20401 appropriate constraints. */
20404 ix86_unary_operator_ok (enum rtx_code,
20408 /* If one of operands is memory, source and destination must match. */
20409 if ((MEM_P (operands[0])
20410 || MEM_P (operands[1]))
20411 && ! rtx_equal_p (operands[0], operands[1]))
20416 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20417 are ok, keeping in mind the possible movddup alternative. */
20420 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20422 if (MEM_P (operands[0]))
20423 return rtx_equal_p (operands[0], operands[1 + high]);
20424 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20425 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20429 /* Post-reload splitter for converting an SF or DFmode value in an
20430 SSE register into an unsigned SImode. */
20433 ix86_split_convert_uns_si_sse (rtx operands[])
20435 machine_mode vecmode;
20436 rtx value, large, zero_or_two31, input, two31, x;
20438 large = operands[1];
20439 zero_or_two31 = operands[2];
20440 input = operands[3];
20441 two31 = operands[4];
20442 vecmode = GET_MODE (large);
20443 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20445 /* Load up the value into the low element. We must ensure that the other
20446 elements are valid floats -- zero is the easiest such value. */
20449 if (vecmode == V4SFmode)
20450 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20452 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20456 input = gen_rtx_REG (vecmode, REGNO (input));
20457 emit_move_insn (value, CONST0_RTX (vecmode));
20458 if (vecmode == V4SFmode)
20459 emit_insn (gen_sse_movss (value, value, input));
20461 emit_insn (gen_sse2_movsd (value, value, input));
20464 emit_move_insn (large, two31);
20465 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20467 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20468 emit_insn (gen_rtx_SET (large, x));
20470 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20471 emit_insn (gen_rtx_SET (zero_or_two31, x));
20473 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20474 emit_insn (gen_rtx_SET (value, x));
20476 large = gen_rtx_REG (V4SImode, REGNO (large));
20477 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20479 x = gen_rtx_REG (V4SImode, REGNO (value));
20480 if (vecmode == V4SFmode)
20481 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20483 emit_insn (gen_sse2_cvttpd2dq (x, value));
20486 emit_insn (gen_xorv4si3 (value, value, large));
20489 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20490 Expects the 64-bit DImode to be supplied in a pair of integral
20491 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20492 -mfpmath=sse, !optimize_size only. */
20495 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20497 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20498 rtx int_xmm, fp_xmm;
20499 rtx biases, exponents;
20502 int_xmm = gen_reg_rtx (V4SImode);
20503 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20504 emit_insn (gen_movdi_to_sse (int_xmm, input));
20505 else if (TARGET_SSE_SPLIT_REGS)
20507 emit_clobber (int_xmm);
20508 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20512 x = gen_reg_rtx (V2DImode);
20513 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20514 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20517 x = gen_rtx_CONST_VECTOR (V4SImode,
20518 gen_rtvec (4, GEN_INT (0x43300000UL),
20519 GEN_INT (0x45300000UL),
20520 const0_rtx, const0_rtx));
20521 exponents = validize_mem (force_const_mem (V4SImode, x));
20523 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20524 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20526 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20527 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20528 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20529 (0x1.0p84 + double(fp_value_hi_xmm)).
20530 Note these exponents differ by 32. */
20532 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20534 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20535 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20536 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20537 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20538 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20539 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20540 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20541 biases = validize_mem (force_const_mem (V2DFmode, biases));
20542 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20544 /* Add the upper and lower DFmode values together. */
20546 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20549 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20550 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20551 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20554 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20557 /* Not used, but eases macroization of patterns. */
20559 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20561 gcc_unreachable ();
20564 /* Convert an unsigned SImode value into a DFmode. Only currently used
20565 for SSE, but applicable anywhere. */
20568 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20570 REAL_VALUE_TYPE TWO31r;
20573 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20574 NULL, 1, OPTAB_DIRECT);
20576 fp = gen_reg_rtx (DFmode);
20577 emit_insn (gen_floatsidf2 (fp, x));
20579 real_ldexp (&TWO31r, &dconst1, 31);
20580 x = const_double_from_real_value (TWO31r, DFmode);
20582 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20584 emit_move_insn (target, x);
20587 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20588 32-bit mode; otherwise we have a direct convert instruction. */
20591 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20593 REAL_VALUE_TYPE TWO32r;
20594 rtx fp_lo, fp_hi, x;
20596 fp_lo = gen_reg_rtx (DFmode);
20597 fp_hi = gen_reg_rtx (DFmode);
20599 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20601 real_ldexp (&TWO32r, &dconst1, 32);
20602 x = const_double_from_real_value (TWO32r, DFmode);
20603 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20605 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20607 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20610 emit_move_insn (target, x);
20613 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20614 For x86_32, -mfpmath=sse, !optimize_size only. */
20616 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20618 REAL_VALUE_TYPE ONE16r;
20619 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20621 real_ldexp (&ONE16r, &dconst1, 16);
20622 x = const_double_from_real_value (ONE16r, SFmode);
20623 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20624 NULL, 0, OPTAB_DIRECT);
20625 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20626 NULL, 0, OPTAB_DIRECT);
20627 fp_hi = gen_reg_rtx (SFmode);
20628 fp_lo = gen_reg_rtx (SFmode);
20629 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20630 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20631 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20633 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20635 if (!rtx_equal_p (target, fp_hi))
20636 emit_move_insn (target, fp_hi);
20639 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20640 a vector of unsigned ints VAL to vector of floats TARGET. */
20643 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20646 REAL_VALUE_TYPE TWO16r;
20647 machine_mode intmode = GET_MODE (val);
20648 machine_mode fltmode = GET_MODE (target);
20649 rtx (*cvt) (rtx, rtx);
20651 if (intmode == V4SImode)
20652 cvt = gen_floatv4siv4sf2;
20654 cvt = gen_floatv8siv8sf2;
20655 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20656 tmp[0] = force_reg (intmode, tmp[0]);
20657 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20659 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20660 NULL_RTX, 1, OPTAB_DIRECT);
20661 tmp[3] = gen_reg_rtx (fltmode);
20662 emit_insn (cvt (tmp[3], tmp[1]));
20663 tmp[4] = gen_reg_rtx (fltmode);
20664 emit_insn (cvt (tmp[4], tmp[2]));
20665 real_ldexp (&TWO16r, &dconst1, 16);
20666 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20667 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20668 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20670 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20672 if (tmp[7] != target)
20673 emit_move_insn (target, tmp[7]);
20676 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20677 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20678 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20679 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20682 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20684 REAL_VALUE_TYPE TWO31r;
20685 rtx two31r, tmp[4];
20686 machine_mode mode = GET_MODE (val);
20687 machine_mode scalarmode = GET_MODE_INNER (mode);
20688 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20689 rtx (*cmp) (rtx, rtx, rtx, rtx);
20692 for (i = 0; i < 3; i++)
20693 tmp[i] = gen_reg_rtx (mode);
20694 real_ldexp (&TWO31r, &dconst1, 31);
20695 two31r = const_double_from_real_value (TWO31r, scalarmode);
20696 two31r = ix86_build_const_vector (mode, 1, two31r);
20697 two31r = force_reg (mode, two31r);
20700 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20701 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20702 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20703 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20704 default: gcc_unreachable ();
20706 tmp[3] = gen_rtx_LE (mode, two31r, val);
20707 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20708 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20710 if (intmode == V4SImode || TARGET_AVX2)
20711 *xorp = expand_simple_binop (intmode, ASHIFT,
20712 gen_lowpart (intmode, tmp[0]),
20713 GEN_INT (31), NULL_RTX, 0,
20717 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20718 two31 = ix86_build_const_vector (intmode, 1, two31);
20719 *xorp = expand_simple_binop (intmode, AND,
20720 gen_lowpart (intmode, tmp[0]),
20721 two31, NULL_RTX, 0,
20724 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20728 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20729 then replicate the value for all elements of the vector
20733 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20737 machine_mode scalar_mode;
20760 n_elt = GET_MODE_NUNITS (mode);
20761 v = rtvec_alloc (n_elt);
20762 scalar_mode = GET_MODE_INNER (mode);
20764 RTVEC_ELT (v, 0) = value;
20766 for (i = 1; i < n_elt; ++i)
20767 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20769 return gen_rtx_CONST_VECTOR (mode, v);
20772 gcc_unreachable ();
20776 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20777 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20778 for an SSE register. If VECT is true, then replicate the mask for
20779 all elements of the vector register. If INVERT is true, then create
20780 a mask excluding the sign bit. */
20783 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20785 machine_mode vec_mode, imode;
20813 vec_mode = VOIDmode;
20818 gcc_unreachable ();
20821 machine_mode inner_mode = GET_MODE_INNER (mode);
20822 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20823 GET_MODE_BITSIZE (inner_mode));
20825 w = wi::bit_not (w);
20827 /* Force this value into the low part of a fp vector constant. */
20828 mask = immed_wide_int_const (w, imode);
20829 mask = gen_lowpart (inner_mode, mask);
20831 if (vec_mode == VOIDmode)
20832 return force_reg (inner_mode, mask);
20834 v = ix86_build_const_vector (vec_mode, vect, mask);
20835 return force_reg (vec_mode, v);
20838 /* Generate code for floating point ABS or NEG. */
20841 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20844 rtx mask, set, dst, src;
20845 bool use_sse = false;
20846 bool vector_mode = VECTOR_MODE_P (mode);
20847 machine_mode vmode = mode;
20851 else if (mode == TFmode)
20853 else if (TARGET_SSE_MATH)
20855 use_sse = SSE_FLOAT_MODE_P (mode);
20856 if (mode == SFmode)
20858 else if (mode == DFmode)
20862 /* NEG and ABS performed with SSE use bitwise mask operations.
20863 Create the appropriate mask now. */
20865 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20872 set = gen_rtx_fmt_e (code, mode, src);
20873 set = gen_rtx_SET (dst, set);
20880 use = gen_rtx_USE (VOIDmode, mask);
20882 par = gen_rtvec (2, set, use);
20885 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20886 par = gen_rtvec (3, set, use, clob);
20888 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20894 /* Expand a copysign operation. Special case operand 0 being a constant. */
20897 ix86_expand_copysign (rtx operands[])
20899 machine_mode mode, vmode;
20900 rtx dest, op0, op1, mask, nmask;
20902 dest = operands[0];
20906 mode = GET_MODE (dest);
20908 if (mode == SFmode)
20910 else if (mode == DFmode)
20915 if (CONST_DOUBLE_P (op0))
20917 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20919 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20920 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20922 if (mode == SFmode || mode == DFmode)
20924 if (op0 == CONST0_RTX (mode))
20925 op0 = CONST0_RTX (vmode);
20928 rtx v = ix86_build_const_vector (vmode, false, op0);
20930 op0 = force_reg (vmode, v);
20933 else if (op0 != CONST0_RTX (mode))
20934 op0 = force_reg (mode, op0);
20936 mask = ix86_build_signbit_mask (vmode, 0, 0);
20938 if (mode == SFmode)
20939 copysign_insn = gen_copysignsf3_const;
20940 else if (mode == DFmode)
20941 copysign_insn = gen_copysigndf3_const;
20943 copysign_insn = gen_copysigntf3_const;
20945 emit_insn (copysign_insn (dest, op0, op1, mask));
20949 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20951 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20952 mask = ix86_build_signbit_mask (vmode, 0, 0);
20954 if (mode == SFmode)
20955 copysign_insn = gen_copysignsf3_var;
20956 else if (mode == DFmode)
20957 copysign_insn = gen_copysigndf3_var;
20959 copysign_insn = gen_copysigntf3_var;
20961 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
20965 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
20966 be a constant, and so has already been expanded into a vector constant. */
20969 ix86_split_copysign_const (rtx operands[])
20971 machine_mode mode, vmode;
20972 rtx dest, op0, mask, x;
20974 dest = operands[0];
20976 mask = operands[3];
20978 mode = GET_MODE (dest);
20979 vmode = GET_MODE (mask);
20981 dest = simplify_gen_subreg (vmode, dest, mode, 0);
20982 x = gen_rtx_AND (vmode, dest, mask);
20983 emit_insn (gen_rtx_SET (dest, x));
20985 if (op0 != CONST0_RTX (vmode))
20987 x = gen_rtx_IOR (vmode, dest, op0);
20988 emit_insn (gen_rtx_SET (dest, x));
20992 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
20993 so we have to do two masks. */
20996 ix86_split_copysign_var (rtx operands[])
20998 machine_mode mode, vmode;
20999 rtx dest, scratch, op0, op1, mask, nmask, x;
21001 dest = operands[0];
21002 scratch = operands[1];
21005 nmask = operands[4];
21006 mask = operands[5];
21008 mode = GET_MODE (dest);
21009 vmode = GET_MODE (mask);
21011 if (rtx_equal_p (op0, op1))
21013 /* Shouldn't happen often (it's useless, obviously), but when it does
21014 we'd generate incorrect code if we continue below. */
21015 emit_move_insn (dest, op0);
21019 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21021 gcc_assert (REGNO (op1) == REGNO (scratch));
21023 x = gen_rtx_AND (vmode, scratch, mask);
21024 emit_insn (gen_rtx_SET (scratch, x));
21027 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21028 x = gen_rtx_NOT (vmode, dest);
21029 x = gen_rtx_AND (vmode, x, op0);
21030 emit_insn (gen_rtx_SET (dest, x));
21034 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21036 x = gen_rtx_AND (vmode, scratch, mask);
21038 else /* alternative 2,4 */
21040 gcc_assert (REGNO (mask) == REGNO (scratch));
21041 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
21042 x = gen_rtx_AND (vmode, scratch, op1);
21044 emit_insn (gen_rtx_SET (scratch, x));
21046 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21048 dest = simplify_gen_subreg (vmode, op0, mode, 0);
21049 x = gen_rtx_AND (vmode, dest, nmask);
21051 else /* alternative 3,4 */
21053 gcc_assert (REGNO (nmask) == REGNO (dest));
21055 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21056 x = gen_rtx_AND (vmode, dest, op0);
21058 emit_insn (gen_rtx_SET (dest, x));
21061 x = gen_rtx_IOR (vmode, dest, scratch);
21062 emit_insn (gen_rtx_SET (dest, x));
21065 /* Return TRUE or FALSE depending on whether the first SET in INSN
21066 has source and destination with matching CC modes, and that the
21067 CC mode is at least as constrained as REQ_MODE. */
21070 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21073 machine_mode set_mode;
21075 set = PATTERN (insn);
21076 if (GET_CODE (set) == PARALLEL)
21077 set = XVECEXP (set, 0, 0);
21078 gcc_assert (GET_CODE (set) == SET);
21079 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21081 set_mode = GET_MODE (SET_DEST (set));
21085 if (req_mode != CCNOmode
21086 && (req_mode != CCmode
21087 || XEXP (SET_SRC (set), 1) != const0_rtx))
21091 if (req_mode == CCGCmode)
21095 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21099 if (req_mode == CCZmode)
21110 if (set_mode != req_mode)
21115 gcc_unreachable ();
21118 return GET_MODE (SET_SRC (set)) == set_mode;
21121 /* Generate insn patterns to do an integer compare of OPERANDS. */
21124 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21126 machine_mode cmpmode;
21129 cmpmode = SELECT_CC_MODE (code, op0, op1);
21130 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21132 /* This is very simple, but making the interface the same as in the
21133 FP case makes the rest of the code easier. */
21134 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21135 emit_insn (gen_rtx_SET (flags, tmp));
21137 /* Return the test that should be put into the flags user, i.e.
21138 the bcc, scc, or cmov instruction. */
21139 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21142 /* Figure out whether to use ordered or unordered fp comparisons.
21143 Return the appropriate mode to use. */
21146 ix86_fp_compare_mode (enum rtx_code)
21148 /* ??? In order to make all comparisons reversible, we do all comparisons
21149 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21150 all forms trapping and nontrapping comparisons, we can make inequality
21151 comparisons trapping again, since it results in better code when using
21152 FCOM based compares. */
21153 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21157 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21159 machine_mode mode = GET_MODE (op0);
21161 if (SCALAR_FLOAT_MODE_P (mode))
21163 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21164 return ix86_fp_compare_mode (code);
21169 /* Only zero flag is needed. */
21170 case EQ: /* ZF=0 */
21171 case NE: /* ZF!=0 */
21173 /* Codes needing carry flag. */
21174 case GEU: /* CF=0 */
21175 case LTU: /* CF=1 */
21176 /* Detect overflow checks. They need just the carry flag. */
21177 if (GET_CODE (op0) == PLUS
21178 && (rtx_equal_p (op1, XEXP (op0, 0))
21179 || rtx_equal_p (op1, XEXP (op0, 1))))
21183 case GTU: /* CF=0 & ZF=0 */
21184 case LEU: /* CF=1 | ZF=1 */
21186 /* Codes possibly doable only with sign flag when
21187 comparing against zero. */
21188 case GE: /* SF=OF or SF=0 */
21189 case LT: /* SF<>OF or SF=1 */
21190 if (op1 == const0_rtx)
21193 /* For other cases Carry flag is not required. */
21195 /* Codes doable only with sign flag when comparing
21196 against zero, but we miss jump instruction for it
21197 so we need to use relational tests against overflow
21198 that thus needs to be zero. */
21199 case GT: /* ZF=0 & SF=OF */
21200 case LE: /* ZF=1 | SF<>OF */
21201 if (op1 == const0_rtx)
21205 /* strcmp pattern do (use flags) and combine may ask us for proper
21210 gcc_unreachable ();
21214 /* Return the fixed registers used for condition codes. */
21217 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21224 /* If two condition code modes are compatible, return a condition code
21225 mode which is compatible with both. Otherwise, return
21228 static machine_mode
21229 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21234 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21237 if ((m1 == CCGCmode && m2 == CCGOCmode)
21238 || (m1 == CCGOCmode && m2 == CCGCmode))
21241 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21243 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21249 gcc_unreachable ();
21281 /* These are only compatible with themselves, which we already
21288 /* Return a comparison we can do and that it is equivalent to
21289 swap_condition (code) apart possibly from orderedness.
21290 But, never change orderedness if TARGET_IEEE_FP, returning
21291 UNKNOWN in that case if necessary. */
21293 static enum rtx_code
21294 ix86_fp_swap_condition (enum rtx_code code)
21298 case GT: /* GTU - CF=0 & ZF=0 */
21299 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21300 case GE: /* GEU - CF=0 */
21301 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21302 case UNLT: /* LTU - CF=1 */
21303 return TARGET_IEEE_FP ? UNKNOWN : GT;
21304 case UNLE: /* LEU - CF=1 | ZF=1 */
21305 return TARGET_IEEE_FP ? UNKNOWN : GE;
21307 return swap_condition (code);
21311 /* Return cost of comparison CODE using the best strategy for performance.
21312 All following functions do use number of instructions as a cost metrics.
21313 In future this should be tweaked to compute bytes for optimize_size and
21314 take into account performance of various instructions on various CPUs. */
21317 ix86_fp_comparison_cost (enum rtx_code code)
21321 /* The cost of code using bit-twiddling on %ah. */
21338 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21342 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21345 gcc_unreachable ();
21348 switch (ix86_fp_comparison_strategy (code))
21350 case IX86_FPCMP_COMI:
21351 return arith_cost > 4 ? 3 : 2;
21352 case IX86_FPCMP_SAHF:
21353 return arith_cost > 4 ? 4 : 3;
21359 /* Return strategy to use for floating-point. We assume that fcomi is always
21360 preferrable where available, since that is also true when looking at size
21361 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21363 enum ix86_fpcmp_strategy
21364 ix86_fp_comparison_strategy (enum rtx_code)
21366 /* Do fcomi/sahf based test when profitable. */
21369 return IX86_FPCMP_COMI;
21371 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21372 return IX86_FPCMP_SAHF;
21374 return IX86_FPCMP_ARITH;
21377 /* Swap, force into registers, or otherwise massage the two operands
21378 to a fp comparison. The operands are updated in place; the new
21379 comparison code is returned. */
21381 static enum rtx_code
21382 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21384 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21385 rtx op0 = *pop0, op1 = *pop1;
21386 machine_mode op_mode = GET_MODE (op0);
21387 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21389 /* All of the unordered compare instructions only work on registers.
21390 The same is true of the fcomi compare instructions. The XFmode
21391 compare instructions require registers except when comparing
21392 against zero or when converting operand 1 from fixed point to
21396 && (fpcmp_mode == CCFPUmode
21397 || (op_mode == XFmode
21398 && ! (standard_80387_constant_p (op0) == 1
21399 || standard_80387_constant_p (op1) == 1)
21400 && GET_CODE (op1) != FLOAT)
21401 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21403 op0 = force_reg (op_mode, op0);
21404 op1 = force_reg (op_mode, op1);
21408 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21409 things around if they appear profitable, otherwise force op0
21410 into a register. */
21412 if (standard_80387_constant_p (op0) == 0
21414 && ! (standard_80387_constant_p (op1) == 0
21417 enum rtx_code new_code = ix86_fp_swap_condition (code);
21418 if (new_code != UNKNOWN)
21420 std::swap (op0, op1);
21426 op0 = force_reg (op_mode, op0);
21428 if (CONSTANT_P (op1))
21430 int tmp = standard_80387_constant_p (op1);
21432 op1 = validize_mem (force_const_mem (op_mode, op1));
21436 op1 = force_reg (op_mode, op1);
21439 op1 = force_reg (op_mode, op1);
21443 /* Try to rearrange the comparison to make it cheaper. */
21444 if (ix86_fp_comparison_cost (code)
21445 > ix86_fp_comparison_cost (swap_condition (code))
21446 && (REG_P (op1) || can_create_pseudo_p ()))
21448 std::swap (op0, op1);
21449 code = swap_condition (code);
21451 op0 = force_reg (op_mode, op0);
21459 /* Convert comparison codes we use to represent FP comparison to integer
21460 code that will result in proper branch. Return UNKNOWN if no such code
21464 ix86_fp_compare_code_to_integer (enum rtx_code code)
21493 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21496 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21498 machine_mode fpcmp_mode, intcmp_mode;
21501 fpcmp_mode = ix86_fp_compare_mode (code);
21502 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21504 /* Do fcomi/sahf based test when profitable. */
21505 switch (ix86_fp_comparison_strategy (code))
21507 case IX86_FPCMP_COMI:
21508 intcmp_mode = fpcmp_mode;
21509 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21510 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21514 case IX86_FPCMP_SAHF:
21515 intcmp_mode = fpcmp_mode;
21516 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21517 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21520 scratch = gen_reg_rtx (HImode);
21521 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21522 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21525 case IX86_FPCMP_ARITH:
21526 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21527 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21528 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21530 scratch = gen_reg_rtx (HImode);
21531 emit_insn (gen_rtx_SET (scratch, tmp2));
21533 /* In the unordered case, we have to check C2 for NaN's, which
21534 doesn't happen to work out to anything nice combination-wise.
21535 So do some bit twiddling on the value we've got in AH to come
21536 up with an appropriate set of condition codes. */
21538 intcmp_mode = CCNOmode;
21543 if (code == GT || !TARGET_IEEE_FP)
21545 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21551 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21552 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21553 intcmp_mode = CCmode;
21559 if (code == LT && TARGET_IEEE_FP)
21561 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21562 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21563 intcmp_mode = CCmode;
21568 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21574 if (code == GE || !TARGET_IEEE_FP)
21576 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21581 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21582 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21588 if (code == LE && TARGET_IEEE_FP)
21590 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21591 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21592 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21593 intcmp_mode = CCmode;
21598 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21604 if (code == EQ && TARGET_IEEE_FP)
21606 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21607 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21608 intcmp_mode = CCmode;
21613 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21619 if (code == NE && TARGET_IEEE_FP)
21621 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21622 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21628 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21634 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21638 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21643 gcc_unreachable ();
21651 /* Return the test that should be put into the flags user, i.e.
21652 the bcc, scc, or cmov instruction. */
21653 return gen_rtx_fmt_ee (code, VOIDmode,
21654 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21659 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21663 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21664 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21666 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21668 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21669 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21672 ret = ix86_expand_int_compare (code, op0, op1);
21678 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21680 machine_mode mode = GET_MODE (op0);
21692 tmp = ix86_expand_compare (code, op0, op1);
21693 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21694 gen_rtx_LABEL_REF (VOIDmode, label),
21696 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21703 /* Expand DImode branch into multiple compare+branch. */
21706 rtx_code_label *label2;
21707 enum rtx_code code1, code2, code3;
21708 machine_mode submode;
21710 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21712 std::swap (op0, op1);
21713 code = swap_condition (code);
21716 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21717 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21719 submode = mode == DImode ? SImode : DImode;
21721 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21722 avoid two branches. This costs one extra insn, so disable when
21723 optimizing for size. */
21725 if ((code == EQ || code == NE)
21726 && (!optimize_insn_for_size_p ()
21727 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21732 if (hi[1] != const0_rtx)
21733 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21734 NULL_RTX, 0, OPTAB_WIDEN);
21737 if (lo[1] != const0_rtx)
21738 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21739 NULL_RTX, 0, OPTAB_WIDEN);
21741 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21742 NULL_RTX, 0, OPTAB_WIDEN);
21744 ix86_expand_branch (code, tmp, const0_rtx, label);
21748 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21749 op1 is a constant and the low word is zero, then we can just
21750 examine the high word. Similarly for low word -1 and
21751 less-or-equal-than or greater-than. */
21753 if (CONST_INT_P (hi[1]))
21756 case LT: case LTU: case GE: case GEU:
21757 if (lo[1] == const0_rtx)
21759 ix86_expand_branch (code, hi[0], hi[1], label);
21763 case LE: case LEU: case GT: case GTU:
21764 if (lo[1] == constm1_rtx)
21766 ix86_expand_branch (code, hi[0], hi[1], label);
21774 /* Otherwise, we need two or three jumps. */
21776 label2 = gen_label_rtx ();
21779 code2 = swap_condition (code);
21780 code3 = unsigned_condition (code);
21784 case LT: case GT: case LTU: case GTU:
21787 case LE: code1 = LT; code2 = GT; break;
21788 case GE: code1 = GT; code2 = LT; break;
21789 case LEU: code1 = LTU; code2 = GTU; break;
21790 case GEU: code1 = GTU; code2 = LTU; break;
21792 case EQ: code1 = UNKNOWN; code2 = NE; break;
21793 case NE: code2 = UNKNOWN; break;
21796 gcc_unreachable ();
21801 * if (hi(a) < hi(b)) goto true;
21802 * if (hi(a) > hi(b)) goto false;
21803 * if (lo(a) < lo(b)) goto true;
21807 if (code1 != UNKNOWN)
21808 ix86_expand_branch (code1, hi[0], hi[1], label);
21809 if (code2 != UNKNOWN)
21810 ix86_expand_branch (code2, hi[0], hi[1], label2);
21812 ix86_expand_branch (code3, lo[0], lo[1], label);
21814 if (code2 != UNKNOWN)
21815 emit_label (label2);
21820 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21825 /* Split branch based on floating point condition. */
21827 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21828 rtx target1, rtx target2, rtx tmp)
21833 if (target2 != pc_rtx)
21835 std::swap (target1, target2);
21836 code = reverse_condition_maybe_unordered (code);
21839 condition = ix86_expand_fp_compare (code, op1, op2,
21842 i = emit_jump_insn (gen_rtx_SET
21844 gen_rtx_IF_THEN_ELSE (VOIDmode,
21845 condition, target1, target2)));
21846 if (split_branch_probability >= 0)
21847 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21851 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21855 gcc_assert (GET_MODE (dest) == QImode);
21857 ret = ix86_expand_compare (code, op0, op1);
21858 PUT_MODE (ret, QImode);
21859 emit_insn (gen_rtx_SET (dest, ret));
21862 /* Expand comparison setting or clearing carry flag. Return true when
21863 successful and set pop for the operation. */
21865 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21867 machine_mode mode =
21868 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21870 /* Do not handle double-mode compares that go through special path. */
21871 if (mode == (TARGET_64BIT ? TImode : DImode))
21874 if (SCALAR_FLOAT_MODE_P (mode))
21877 rtx_insn *compare_seq;
21879 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21881 /* Shortcut: following common codes never translate
21882 into carry flag compares. */
21883 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21884 || code == ORDERED || code == UNORDERED)
21887 /* These comparisons require zero flag; swap operands so they won't. */
21888 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21889 && !TARGET_IEEE_FP)
21891 std::swap (op0, op1);
21892 code = swap_condition (code);
21895 /* Try to expand the comparison and verify that we end up with
21896 carry flag based comparison. This fails to be true only when
21897 we decide to expand comparison using arithmetic that is not
21898 too common scenario. */
21900 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21901 compare_seq = get_insns ();
21904 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21905 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21906 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21908 code = GET_CODE (compare_op);
21910 if (code != LTU && code != GEU)
21913 emit_insn (compare_seq);
21918 if (!INTEGRAL_MODE_P (mode))
21927 /* Convert a==0 into (unsigned)a<1. */
21930 if (op1 != const0_rtx)
21933 code = (code == EQ ? LTU : GEU);
21936 /* Convert a>b into b<a or a>=b-1. */
21939 if (CONST_INT_P (op1))
21941 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
21942 /* Bail out on overflow. We still can swap operands but that
21943 would force loading of the constant into register. */
21944 if (op1 == const0_rtx
21945 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
21947 code = (code == GTU ? GEU : LTU);
21951 std::swap (op0, op1);
21952 code = (code == GTU ? LTU : GEU);
21956 /* Convert a>=0 into (unsigned)a<0x80000000. */
21959 if (mode == DImode || op1 != const0_rtx)
21961 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21962 code = (code == LT ? GEU : LTU);
21966 if (mode == DImode || op1 != constm1_rtx)
21968 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21969 code = (code == LE ? GEU : LTU);
21975 /* Swapping operands may cause constant to appear as first operand. */
21976 if (!nonimmediate_operand (op0, VOIDmode))
21978 if (!can_create_pseudo_p ())
21980 op0 = force_reg (mode, op0);
21982 *pop = ix86_expand_compare (code, op0, op1);
21983 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
21988 ix86_expand_int_movcc (rtx operands[])
21990 enum rtx_code code = GET_CODE (operands[1]), compare_code;
21991 rtx_insn *compare_seq;
21993 machine_mode mode = GET_MODE (operands[0]);
21994 bool sign_bit_compare_p = false;
21995 rtx op0 = XEXP (operands[1], 0);
21996 rtx op1 = XEXP (operands[1], 1);
21998 if (GET_MODE (op0) == TImode
21999 || (GET_MODE (op0) == DImode
22004 compare_op = ix86_expand_compare (code, op0, op1);
22005 compare_seq = get_insns ();
22008 compare_code = GET_CODE (compare_op);
22010 if ((op1 == const0_rtx && (code == GE || code == LT))
22011 || (op1 == constm1_rtx && (code == GT || code == LE)))
22012 sign_bit_compare_p = true;
22014 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22015 HImode insns, we'd be swallowed in word prefix ops. */
22017 if ((mode != HImode || TARGET_FAST_PREFIX)
22018 && (mode != (TARGET_64BIT ? TImode : DImode))
22019 && CONST_INT_P (operands[2])
22020 && CONST_INT_P (operands[3]))
22022 rtx out = operands[0];
22023 HOST_WIDE_INT ct = INTVAL (operands[2]);
22024 HOST_WIDE_INT cf = INTVAL (operands[3]);
22025 HOST_WIDE_INT diff;
22028 /* Sign bit compares are better done using shifts than we do by using
22030 if (sign_bit_compare_p
22031 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22033 /* Detect overlap between destination and compare sources. */
22036 if (!sign_bit_compare_p)
22039 bool fpcmp = false;
22041 compare_code = GET_CODE (compare_op);
22043 flags = XEXP (compare_op, 0);
22045 if (GET_MODE (flags) == CCFPmode
22046 || GET_MODE (flags) == CCFPUmode)
22050 = ix86_fp_compare_code_to_integer (compare_code);
22053 /* To simplify rest of code, restrict to the GEU case. */
22054 if (compare_code == LTU)
22056 std::swap (ct, cf);
22057 compare_code = reverse_condition (compare_code);
22058 code = reverse_condition (code);
22063 PUT_CODE (compare_op,
22064 reverse_condition_maybe_unordered
22065 (GET_CODE (compare_op)));
22067 PUT_CODE (compare_op,
22068 reverse_condition (GET_CODE (compare_op)));
22072 if (reg_overlap_mentioned_p (out, op0)
22073 || reg_overlap_mentioned_p (out, op1))
22074 tmp = gen_reg_rtx (mode);
22076 if (mode == DImode)
22077 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22079 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22080 flags, compare_op));
22084 if (code == GT || code == GE)
22085 code = reverse_condition (code);
22088 std::swap (ct, cf);
22091 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22104 tmp = expand_simple_binop (mode, PLUS,
22106 copy_rtx (tmp), 1, OPTAB_DIRECT);
22117 tmp = expand_simple_binop (mode, IOR,
22119 copy_rtx (tmp), 1, OPTAB_DIRECT);
22121 else if (diff == -1 && ct)
22131 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22133 tmp = expand_simple_binop (mode, PLUS,
22134 copy_rtx (tmp), GEN_INT (cf),
22135 copy_rtx (tmp), 1, OPTAB_DIRECT);
22143 * andl cf - ct, dest
22153 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22156 tmp = expand_simple_binop (mode, AND,
22158 gen_int_mode (cf - ct, mode),
22159 copy_rtx (tmp), 1, OPTAB_DIRECT);
22161 tmp = expand_simple_binop (mode, PLUS,
22162 copy_rtx (tmp), GEN_INT (ct),
22163 copy_rtx (tmp), 1, OPTAB_DIRECT);
22166 if (!rtx_equal_p (tmp, out))
22167 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22174 machine_mode cmp_mode = GET_MODE (op0);
22175 enum rtx_code new_code;
22177 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22179 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22181 /* We may be reversing unordered compare to normal compare, that
22182 is not valid in general (we may convert non-trapping condition
22183 to trapping one), however on i386 we currently emit all
22184 comparisons unordered. */
22185 new_code = reverse_condition_maybe_unordered (code);
22188 new_code = ix86_reverse_condition (code, cmp_mode);
22189 if (new_code != UNKNOWN)
22191 std::swap (ct, cf);
22197 compare_code = UNKNOWN;
22198 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22199 && CONST_INT_P (op1))
22201 if (op1 == const0_rtx
22202 && (code == LT || code == GE))
22203 compare_code = code;
22204 else if (op1 == constm1_rtx)
22208 else if (code == GT)
22213 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22214 if (compare_code != UNKNOWN
22215 && GET_MODE (op0) == GET_MODE (out)
22216 && (cf == -1 || ct == -1))
22218 /* If lea code below could be used, only optimize
22219 if it results in a 2 insn sequence. */
22221 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22222 || diff == 3 || diff == 5 || diff == 9)
22223 || (compare_code == LT && ct == -1)
22224 || (compare_code == GE && cf == -1))
22227 * notl op1 (if necessary)
22235 code = reverse_condition (code);
22238 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22240 out = expand_simple_binop (mode, IOR,
22242 out, 1, OPTAB_DIRECT);
22243 if (out != operands[0])
22244 emit_move_insn (operands[0], out);
22251 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22252 || diff == 3 || diff == 5 || diff == 9)
22253 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22255 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22261 * lea cf(dest*(ct-cf)),dest
22265 * This also catches the degenerate setcc-only case.
22271 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22274 /* On x86_64 the lea instruction operates on Pmode, so we need
22275 to get arithmetics done in proper mode to match. */
22277 tmp = copy_rtx (out);
22281 out1 = copy_rtx (out);
22282 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22286 tmp = gen_rtx_PLUS (mode, tmp, out1);
22292 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22295 if (!rtx_equal_p (tmp, out))
22298 out = force_operand (tmp, copy_rtx (out));
22300 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22302 if (!rtx_equal_p (out, operands[0]))
22303 emit_move_insn (operands[0], copy_rtx (out));
22309 * General case: Jumpful:
22310 * xorl dest,dest cmpl op1, op2
22311 * cmpl op1, op2 movl ct, dest
22312 * setcc dest jcc 1f
22313 * decl dest movl cf, dest
22314 * andl (cf-ct),dest 1:
22317 * Size 20. Size 14.
22319 * This is reasonably steep, but branch mispredict costs are
22320 * high on modern cpus, so consider failing only if optimizing
22324 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22325 && BRANCH_COST (optimize_insn_for_speed_p (),
22330 machine_mode cmp_mode = GET_MODE (op0);
22331 enum rtx_code new_code;
22333 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22335 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22337 /* We may be reversing unordered compare to normal compare,
22338 that is not valid in general (we may convert non-trapping
22339 condition to trapping one), however on i386 we currently
22340 emit all comparisons unordered. */
22341 new_code = reverse_condition_maybe_unordered (code);
22345 new_code = ix86_reverse_condition (code, cmp_mode);
22346 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22347 compare_code = reverse_condition (compare_code);
22350 if (new_code != UNKNOWN)
22358 if (compare_code != UNKNOWN)
22360 /* notl op1 (if needed)
22365 For x < 0 (resp. x <= -1) there will be no notl,
22366 so if possible swap the constants to get rid of the
22368 True/false will be -1/0 while code below (store flag
22369 followed by decrement) is 0/-1, so the constants need
22370 to be exchanged once more. */
22372 if (compare_code == GE || !cf)
22374 code = reverse_condition (code);
22378 std::swap (ct, cf);
22380 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22384 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22386 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22388 copy_rtx (out), 1, OPTAB_DIRECT);
22391 out = expand_simple_binop (mode, AND, copy_rtx (out),
22392 gen_int_mode (cf - ct, mode),
22393 copy_rtx (out), 1, OPTAB_DIRECT);
22395 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22396 copy_rtx (out), 1, OPTAB_DIRECT);
22397 if (!rtx_equal_p (out, operands[0]))
22398 emit_move_insn (operands[0], copy_rtx (out));
22404 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22406 /* Try a few things more with specific constants and a variable. */
22409 rtx var, orig_out, out, tmp;
22411 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22414 /* If one of the two operands is an interesting constant, load a
22415 constant with the above and mask it in with a logical operation. */
22417 if (CONST_INT_P (operands[2]))
22420 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22421 operands[3] = constm1_rtx, op = and_optab;
22422 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22423 operands[3] = const0_rtx, op = ior_optab;
22427 else if (CONST_INT_P (operands[3]))
22430 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22431 operands[2] = constm1_rtx, op = and_optab;
22432 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22433 operands[2] = const0_rtx, op = ior_optab;
22440 orig_out = operands[0];
22441 tmp = gen_reg_rtx (mode);
22444 /* Recurse to get the constant loaded. */
22445 if (!ix86_expand_int_movcc (operands))
22448 /* Mask in the interesting variable. */
22449 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22451 if (!rtx_equal_p (out, orig_out))
22452 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22458 * For comparison with above,
22468 if (! nonimmediate_operand (operands[2], mode))
22469 operands[2] = force_reg (mode, operands[2]);
22470 if (! nonimmediate_operand (operands[3], mode))
22471 operands[3] = force_reg (mode, operands[3]);
22473 if (! register_operand (operands[2], VOIDmode)
22475 || ! register_operand (operands[3], VOIDmode)))
22476 operands[2] = force_reg (mode, operands[2]);
22479 && ! register_operand (operands[3], VOIDmode))
22480 operands[3] = force_reg (mode, operands[3]);
22482 emit_insn (compare_seq);
22483 emit_insn (gen_rtx_SET (operands[0],
22484 gen_rtx_IF_THEN_ELSE (mode,
22485 compare_op, operands[2],
22490 /* Swap, force into registers, or otherwise massage the two operands
22491 to an sse comparison with a mask result. Thus we differ a bit from
22492 ix86_prepare_fp_compare_args which expects to produce a flags result.
22494 The DEST operand exists to help determine whether to commute commutative
22495 operators. The POP0/POP1 operands are updated in place. The new
22496 comparison code is returned, or UNKNOWN if not implementable. */
22498 static enum rtx_code
22499 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22500 rtx *pop0, rtx *pop1)
22506 /* AVX supports all the needed comparisons. */
22509 /* We have no LTGT as an operator. We could implement it with
22510 NE & ORDERED, but this requires an extra temporary. It's
22511 not clear that it's worth it. */
22518 /* These are supported directly. */
22525 /* AVX has 3 operand comparisons, no need to swap anything. */
22528 /* For commutative operators, try to canonicalize the destination
22529 operand to be first in the comparison - this helps reload to
22530 avoid extra moves. */
22531 if (!dest || !rtx_equal_p (dest, *pop1))
22539 /* These are not supported directly before AVX, and furthermore
22540 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22541 comparison operands to transform into something that is
22543 std::swap (*pop0, *pop1);
22544 code = swap_condition (code);
22548 gcc_unreachable ();
22554 /* Detect conditional moves that exactly match min/max operational
22555 semantics. Note that this is IEEE safe, as long as we don't
22556 interchange the operands.
22558 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22559 and TRUE if the operation is successful and instructions are emitted. */
22562 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22563 rtx cmp_op1, rtx if_true, rtx if_false)
22571 else if (code == UNGE)
22572 std::swap (if_true, if_false);
22576 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22578 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22583 mode = GET_MODE (dest);
22585 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22586 but MODE may be a vector mode and thus not appropriate. */
22587 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22589 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22592 if_true = force_reg (mode, if_true);
22593 v = gen_rtvec (2, if_true, if_false);
22594 tmp = gen_rtx_UNSPEC (mode, v, u);
22598 code = is_min ? SMIN : SMAX;
22599 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22602 emit_insn (gen_rtx_SET (dest, tmp));
22606 /* Expand an sse vector comparison. Return the register with the result. */
22609 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22610 rtx op_true, rtx op_false)
22612 machine_mode mode = GET_MODE (dest);
22613 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22615 /* In general case result of comparison can differ from operands' type. */
22616 machine_mode cmp_mode;
22618 /* In AVX512F the result of comparison is an integer mask. */
22619 bool maskcmp = false;
22622 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22624 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22625 gcc_assert (cmp_mode != BLKmode);
22630 cmp_mode = cmp_ops_mode;
22633 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22634 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22635 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22638 || (op_true && reg_overlap_mentioned_p (dest, op_true))
22639 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
22640 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22642 /* Compare patterns for int modes are unspec in AVX512F only. */
22643 if (maskcmp && (code == GT || code == EQ))
22645 rtx (*gen)(rtx, rtx, rtx);
22647 switch (cmp_ops_mode)
22650 gcc_assert (TARGET_AVX512BW);
22651 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22654 gcc_assert (TARGET_AVX512BW);
22655 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22658 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22661 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22669 emit_insn (gen (dest, cmp_op0, cmp_op1));
22673 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22675 if (cmp_mode != mode && !maskcmp)
22677 x = force_reg (cmp_ops_mode, x);
22678 convert_move (dest, x, false);
22681 emit_insn (gen_rtx_SET (dest, x));
22686 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22687 operations. This is used for both scalar and vector conditional moves. */
22690 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22692 machine_mode mode = GET_MODE (dest);
22693 machine_mode cmpmode = GET_MODE (cmp);
22695 /* In AVX512F the result of comparison is an integer mask. */
22696 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22700 /* If we have an integer mask and FP value then we need
22701 to cast mask to FP mode. */
22702 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
22704 cmp = force_reg (cmpmode, cmp);
22705 cmp = gen_rtx_SUBREG (mode, cmp, 0);
22708 if (vector_all_ones_operand (op_true, mode)
22709 && rtx_equal_p (op_false, CONST0_RTX (mode))
22712 emit_insn (gen_rtx_SET (dest, cmp));
22714 else if (op_false == CONST0_RTX (mode)
22717 op_true = force_reg (mode, op_true);
22718 x = gen_rtx_AND (mode, cmp, op_true);
22719 emit_insn (gen_rtx_SET (dest, x));
22721 else if (op_true == CONST0_RTX (mode)
22724 op_false = force_reg (mode, op_false);
22725 x = gen_rtx_NOT (mode, cmp);
22726 x = gen_rtx_AND (mode, x, op_false);
22727 emit_insn (gen_rtx_SET (dest, x));
22729 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22732 op_false = force_reg (mode, op_false);
22733 x = gen_rtx_IOR (mode, cmp, op_false);
22734 emit_insn (gen_rtx_SET (dest, x));
22736 else if (TARGET_XOP
22739 op_true = force_reg (mode, op_true);
22741 if (!nonimmediate_operand (op_false, mode))
22742 op_false = force_reg (mode, op_false);
22744 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22750 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22753 if (!nonimmediate_operand (op_true, mode))
22754 op_true = force_reg (mode, op_true);
22756 op_false = force_reg (mode, op_false);
22762 gen = gen_sse4_1_blendvps;
22766 gen = gen_sse4_1_blendvpd;
22774 gen = gen_sse4_1_pblendvb;
22775 if (mode != V16QImode)
22776 d = gen_reg_rtx (V16QImode);
22777 op_false = gen_lowpart (V16QImode, op_false);
22778 op_true = gen_lowpart (V16QImode, op_true);
22779 cmp = gen_lowpart (V16QImode, cmp);
22784 gen = gen_avx_blendvps256;
22788 gen = gen_avx_blendvpd256;
22796 gen = gen_avx2_pblendvb;
22797 if (mode != V32QImode)
22798 d = gen_reg_rtx (V32QImode);
22799 op_false = gen_lowpart (V32QImode, op_false);
22800 op_true = gen_lowpart (V32QImode, op_true);
22801 cmp = gen_lowpart (V32QImode, cmp);
22806 gen = gen_avx512bw_blendmv64qi;
22809 gen = gen_avx512bw_blendmv32hi;
22812 gen = gen_avx512f_blendmv16si;
22815 gen = gen_avx512f_blendmv8di;
22818 gen = gen_avx512f_blendmv8df;
22821 gen = gen_avx512f_blendmv16sf;
22830 emit_insn (gen (d, op_false, op_true, cmp));
22832 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22836 op_true = force_reg (mode, op_true);
22838 t2 = gen_reg_rtx (mode);
22840 t3 = gen_reg_rtx (mode);
22844 x = gen_rtx_AND (mode, op_true, cmp);
22845 emit_insn (gen_rtx_SET (t2, x));
22847 x = gen_rtx_NOT (mode, cmp);
22848 x = gen_rtx_AND (mode, x, op_false);
22849 emit_insn (gen_rtx_SET (t3, x));
22851 x = gen_rtx_IOR (mode, t3, t2);
22852 emit_insn (gen_rtx_SET (dest, x));
22857 /* Expand a floating-point conditional move. Return true if successful. */
22860 ix86_expand_fp_movcc (rtx operands[])
22862 machine_mode mode = GET_MODE (operands[0]);
22863 enum rtx_code code = GET_CODE (operands[1]);
22864 rtx tmp, compare_op;
22865 rtx op0 = XEXP (operands[1], 0);
22866 rtx op1 = XEXP (operands[1], 1);
22868 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22870 machine_mode cmode;
22872 /* Since we've no cmove for sse registers, don't force bad register
22873 allocation just to gain access to it. Deny movcc when the
22874 comparison mode doesn't match the move mode. */
22875 cmode = GET_MODE (op0);
22876 if (cmode == VOIDmode)
22877 cmode = GET_MODE (op1);
22881 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22882 if (code == UNKNOWN)
22885 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22886 operands[2], operands[3]))
22889 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22890 operands[2], operands[3]);
22891 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22895 if (GET_MODE (op0) == TImode
22896 || (GET_MODE (op0) == DImode
22900 /* The floating point conditional move instructions don't directly
22901 support conditions resulting from a signed integer comparison. */
22903 compare_op = ix86_expand_compare (code, op0, op1);
22904 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22906 tmp = gen_reg_rtx (QImode);
22907 ix86_expand_setcc (tmp, code, op0, op1);
22909 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22912 emit_insn (gen_rtx_SET (operands[0],
22913 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22914 operands[2], operands[3])));
22919 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
22922 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
22943 gcc_unreachable ();
22947 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
22950 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
22967 gcc_unreachable ();
22971 /* Return immediate value to be used in UNSPEC_PCMP
22972 for comparison CODE in MODE. */
22975 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
22977 if (FLOAT_MODE_P (mode))
22978 return ix86_fp_cmp_code_to_pcmp_immediate (code);
22979 return ix86_int_cmp_code_to_pcmp_immediate (code);
22982 /* Expand AVX-512 vector comparison. */
22985 ix86_expand_mask_vec_cmp (rtx operands[])
22987 machine_mode mask_mode = GET_MODE (operands[0]);
22988 machine_mode cmp_mode = GET_MODE (operands[2]);
22989 enum rtx_code code = GET_CODE (operands[1]);
22990 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
23000 unspec_code = UNSPEC_UNSIGNED_PCMP;
23004 unspec_code = UNSPEC_PCMP;
23007 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
23010 emit_insn (gen_rtx_SET (operands[0], unspec));
23015 /* Expand fp vector comparison. */
23018 ix86_expand_fp_vec_cmp (rtx operands[])
23020 enum rtx_code code = GET_CODE (operands[1]);
23023 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23024 &operands[2], &operands[3]);
23025 if (code == UNKNOWN)
23028 switch (GET_CODE (operands[1]))
23031 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23032 operands[3], NULL, NULL);
23033 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23034 operands[3], NULL, NULL);
23038 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23039 operands[3], NULL, NULL);
23040 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23041 operands[3], NULL, NULL);
23045 gcc_unreachable ();
23047 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23051 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23052 operands[1], operands[2]);
23054 if (operands[0] != cmp)
23055 emit_move_insn (operands[0], cmp);
23061 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23062 rtx op_true, rtx op_false, bool *negate)
23064 machine_mode data_mode = GET_MODE (dest);
23065 machine_mode mode = GET_MODE (cop0);
23070 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23072 && (mode == V16QImode || mode == V8HImode
23073 || mode == V4SImode || mode == V2DImode))
23077 /* Canonicalize the comparison to EQ, GT, GTU. */
23088 code = reverse_condition (code);
23094 code = reverse_condition (code);
23100 std::swap (cop0, cop1);
23101 code = swap_condition (code);
23105 gcc_unreachable ();
23108 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23109 if (mode == V2DImode)
23114 /* SSE4.1 supports EQ. */
23115 if (!TARGET_SSE4_1)
23121 /* SSE4.2 supports GT/GTU. */
23122 if (!TARGET_SSE4_2)
23127 gcc_unreachable ();
23131 /* Unsigned parallel compare is not supported by the hardware.
23132 Play some tricks to turn this into a signed comparison
23136 cop0 = force_reg (mode, cop0);
23148 rtx (*gen_sub3) (rtx, rtx, rtx);
23152 case V16SImode: gen_sub3 = gen_subv16si3; break;
23153 case V8DImode: gen_sub3 = gen_subv8di3; break;
23154 case V8SImode: gen_sub3 = gen_subv8si3; break;
23155 case V4DImode: gen_sub3 = gen_subv4di3; break;
23156 case V4SImode: gen_sub3 = gen_subv4si3; break;
23157 case V2DImode: gen_sub3 = gen_subv2di3; break;
23159 gcc_unreachable ();
23161 /* Subtract (-(INT MAX) - 1) from both operands to make
23163 mask = ix86_build_signbit_mask (mode, true, false);
23164 t1 = gen_reg_rtx (mode);
23165 emit_insn (gen_sub3 (t1, cop0, mask));
23167 t2 = gen_reg_rtx (mode);
23168 emit_insn (gen_sub3 (t2, cop1, mask));
23182 /* Perform a parallel unsigned saturating subtraction. */
23183 x = gen_reg_rtx (mode);
23184 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23188 cop1 = CONST0_RTX (mode);
23190 *negate = !*negate;
23194 gcc_unreachable ();
23200 std::swap (op_true, op_false);
23202 /* Allow the comparison to be done in one mode, but the movcc to
23203 happen in another mode. */
23204 if (data_mode == mode)
23206 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23207 op_true, op_false);
23211 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23212 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23213 op_true, op_false);
23214 if (GET_MODE (x) == mode)
23215 x = gen_lowpart (data_mode, x);
23221 /* Expand integer vector comparison. */
23224 ix86_expand_int_vec_cmp (rtx operands[])
23226 rtx_code code = GET_CODE (operands[1]);
23227 bool negate = false;
23228 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23229 operands[3], NULL, NULL, &negate);
23235 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23236 CONST0_RTX (GET_MODE (cmp)),
23237 NULL, NULL, &negate);
23239 gcc_assert (!negate);
23241 if (operands[0] != cmp)
23242 emit_move_insn (operands[0], cmp);
23247 /* Expand a floating-point vector conditional move; a vcond operation
23248 rather than a movcc operation. */
23251 ix86_expand_fp_vcond (rtx operands[])
23253 enum rtx_code code = GET_CODE (operands[3]);
23256 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23257 &operands[4], &operands[5]);
23258 if (code == UNKNOWN)
23261 switch (GET_CODE (operands[3]))
23264 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23265 operands[5], operands[0], operands[0]);
23266 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23267 operands[5], operands[1], operands[2]);
23271 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23272 operands[5], operands[0], operands[0]);
23273 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23274 operands[5], operands[1], operands[2]);
23278 gcc_unreachable ();
23280 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23282 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23286 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23287 operands[5], operands[1], operands[2]))
23290 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23291 operands[1], operands[2]);
23292 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23296 /* Expand a signed/unsigned integral vector conditional move. */
23299 ix86_expand_int_vcond (rtx operands[])
23301 machine_mode data_mode = GET_MODE (operands[0]);
23302 machine_mode mode = GET_MODE (operands[4]);
23303 enum rtx_code code = GET_CODE (operands[3]);
23304 bool negate = false;
23307 cop0 = operands[4];
23308 cop1 = operands[5];
23310 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23311 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23312 if ((code == LT || code == GE)
23313 && data_mode == mode
23314 && cop1 == CONST0_RTX (mode)
23315 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23316 && GET_MODE_UNIT_SIZE (data_mode) > 1
23317 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23318 && (GET_MODE_SIZE (data_mode) == 16
23319 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23321 rtx negop = operands[2 - (code == LT)];
23322 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23323 if (negop == CONST1_RTX (data_mode))
23325 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23326 operands[0], 1, OPTAB_DIRECT);
23327 if (res != operands[0])
23328 emit_move_insn (operands[0], res);
23331 else if (GET_MODE_INNER (data_mode) != DImode
23332 && vector_all_ones_operand (negop, data_mode))
23334 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23335 operands[0], 0, OPTAB_DIRECT);
23336 if (res != operands[0])
23337 emit_move_insn (operands[0], res);
23342 if (!nonimmediate_operand (cop1, mode))
23343 cop1 = force_reg (mode, cop1);
23344 if (!general_operand (operands[1], data_mode))
23345 operands[1] = force_reg (data_mode, operands[1]);
23346 if (!general_operand (operands[2], data_mode))
23347 operands[2] = force_reg (data_mode, operands[2]);
23349 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23350 operands[1], operands[2], &negate);
23355 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23356 operands[2-negate]);
23360 /* AVX512F does support 64-byte integer vector operations,
23361 thus the longest vector we are faced with is V64QImode. */
23362 #define MAX_VECT_LEN 64
23364 struct expand_vec_perm_d
23366 rtx target, op0, op1;
23367 unsigned char perm[MAX_VECT_LEN];
23368 machine_mode vmode;
23369 unsigned char nelt;
23370 bool one_operand_p;
23375 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23376 struct expand_vec_perm_d *d)
23378 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23379 expander, so args are either in d, or in op0, op1 etc. */
23380 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23381 machine_mode maskmode = mode;
23382 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23387 if (TARGET_AVX512VL && TARGET_AVX512BW)
23388 gen = gen_avx512vl_vpermi2varv8hi3;
23391 if (TARGET_AVX512VL && TARGET_AVX512BW)
23392 gen = gen_avx512vl_vpermi2varv16hi3;
23395 if (TARGET_AVX512VBMI)
23396 gen = gen_avx512bw_vpermi2varv64qi3;
23399 if (TARGET_AVX512BW)
23400 gen = gen_avx512bw_vpermi2varv32hi3;
23403 if (TARGET_AVX512VL)
23404 gen = gen_avx512vl_vpermi2varv4si3;
23407 if (TARGET_AVX512VL)
23408 gen = gen_avx512vl_vpermi2varv8si3;
23411 if (TARGET_AVX512F)
23412 gen = gen_avx512f_vpermi2varv16si3;
23415 if (TARGET_AVX512VL)
23417 gen = gen_avx512vl_vpermi2varv4sf3;
23418 maskmode = V4SImode;
23422 if (TARGET_AVX512VL)
23424 gen = gen_avx512vl_vpermi2varv8sf3;
23425 maskmode = V8SImode;
23429 if (TARGET_AVX512F)
23431 gen = gen_avx512f_vpermi2varv16sf3;
23432 maskmode = V16SImode;
23436 if (TARGET_AVX512VL)
23437 gen = gen_avx512vl_vpermi2varv2di3;
23440 if (TARGET_AVX512VL)
23441 gen = gen_avx512vl_vpermi2varv4di3;
23444 if (TARGET_AVX512F)
23445 gen = gen_avx512f_vpermi2varv8di3;
23448 if (TARGET_AVX512VL)
23450 gen = gen_avx512vl_vpermi2varv2df3;
23451 maskmode = V2DImode;
23455 if (TARGET_AVX512VL)
23457 gen = gen_avx512vl_vpermi2varv4df3;
23458 maskmode = V4DImode;
23462 if (TARGET_AVX512F)
23464 gen = gen_avx512f_vpermi2varv8df3;
23465 maskmode = V8DImode;
23475 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23476 expander, so args are either in d, or in op0, op1 etc. */
23480 target = d->target;
23483 for (int i = 0; i < d->nelt; ++i)
23484 vec[i] = GEN_INT (d->perm[i]);
23485 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23488 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23492 /* Expand a variable vector permutation. */
23495 ix86_expand_vec_perm (rtx operands[])
23497 rtx target = operands[0];
23498 rtx op0 = operands[1];
23499 rtx op1 = operands[2];
23500 rtx mask = operands[3];
23501 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23502 machine_mode mode = GET_MODE (op0);
23503 machine_mode maskmode = GET_MODE (mask);
23505 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23507 /* Number of elements in the vector. */
23508 w = GET_MODE_NUNITS (mode);
23509 e = GET_MODE_UNIT_SIZE (mode);
23510 gcc_assert (w <= 64);
23512 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23517 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23519 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23520 an constant shuffle operand. With a tiny bit of effort we can
23521 use VPERMD instead. A re-interpretation stall for V4DFmode is
23522 unfortunate but there's no avoiding it.
23523 Similarly for V16HImode we don't have instructions for variable
23524 shuffling, while for V32QImode we can use after preparing suitable
23525 masks vpshufb; vpshufb; vpermq; vpor. */
23527 if (mode == V16HImode)
23529 maskmode = mode = V32QImode;
23535 maskmode = mode = V8SImode;
23539 t1 = gen_reg_rtx (maskmode);
23541 /* Replicate the low bits of the V4DImode mask into V8SImode:
23543 t1 = { A A B B C C D D }. */
23544 for (i = 0; i < w / 2; ++i)
23545 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23546 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23547 vt = force_reg (maskmode, vt);
23548 mask = gen_lowpart (maskmode, mask);
23549 if (maskmode == V8SImode)
23550 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23552 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23554 /* Multiply the shuffle indicies by two. */
23555 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23558 /* Add one to the odd shuffle indicies:
23559 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23560 for (i = 0; i < w / 2; ++i)
23562 vec[i * 2] = const0_rtx;
23563 vec[i * 2 + 1] = const1_rtx;
23565 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23566 vt = validize_mem (force_const_mem (maskmode, vt));
23567 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23570 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23571 operands[3] = mask = t1;
23572 target = gen_reg_rtx (mode);
23573 op0 = gen_lowpart (mode, op0);
23574 op1 = gen_lowpart (mode, op1);
23580 /* The VPERMD and VPERMPS instructions already properly ignore
23581 the high bits of the shuffle elements. No need for us to
23582 perform an AND ourselves. */
23583 if (one_operand_shuffle)
23585 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23586 if (target != operands[0])
23587 emit_move_insn (operands[0],
23588 gen_lowpart (GET_MODE (operands[0]), target));
23592 t1 = gen_reg_rtx (V8SImode);
23593 t2 = gen_reg_rtx (V8SImode);
23594 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23595 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23601 mask = gen_lowpart (V8SImode, mask);
23602 if (one_operand_shuffle)
23603 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23606 t1 = gen_reg_rtx (V8SFmode);
23607 t2 = gen_reg_rtx (V8SFmode);
23608 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23609 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23615 /* By combining the two 128-bit input vectors into one 256-bit
23616 input vector, we can use VPERMD and VPERMPS for the full
23617 two-operand shuffle. */
23618 t1 = gen_reg_rtx (V8SImode);
23619 t2 = gen_reg_rtx (V8SImode);
23620 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23621 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23622 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23623 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23627 t1 = gen_reg_rtx (V8SFmode);
23628 t2 = gen_reg_rtx (V8SImode);
23629 mask = gen_lowpart (V4SImode, mask);
23630 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23631 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23632 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23633 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23637 t1 = gen_reg_rtx (V32QImode);
23638 t2 = gen_reg_rtx (V32QImode);
23639 t3 = gen_reg_rtx (V32QImode);
23640 vt2 = GEN_INT (-128);
23641 for (i = 0; i < 32; i++)
23643 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23644 vt = force_reg (V32QImode, vt);
23645 for (i = 0; i < 32; i++)
23646 vec[i] = i < 16 ? vt2 : const0_rtx;
23647 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23648 vt2 = force_reg (V32QImode, vt2);
23649 /* From mask create two adjusted masks, which contain the same
23650 bits as mask in the low 7 bits of each vector element.
23651 The first mask will have the most significant bit clear
23652 if it requests element from the same 128-bit lane
23653 and MSB set if it requests element from the other 128-bit lane.
23654 The second mask will have the opposite values of the MSB,
23655 and additionally will have its 128-bit lanes swapped.
23656 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23657 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23658 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23659 stands for other 12 bytes. */
23660 /* The bit whether element is from the same lane or the other
23661 lane is bit 4, so shift it up by 3 to the MSB position. */
23662 t5 = gen_reg_rtx (V4DImode);
23663 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23665 /* Clear MSB bits from the mask just in case it had them set. */
23666 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23667 /* After this t1 will have MSB set for elements from other lane. */
23668 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23669 /* Clear bits other than MSB. */
23670 emit_insn (gen_andv32qi3 (t1, t1, vt));
23671 /* Or in the lower bits from mask into t3. */
23672 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23673 /* And invert MSB bits in t1, so MSB is set for elements from the same
23675 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23676 /* Swap 128-bit lanes in t3. */
23677 t6 = gen_reg_rtx (V4DImode);
23678 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23679 const2_rtx, GEN_INT (3),
23680 const0_rtx, const1_rtx));
23681 /* And or in the lower bits from mask into t1. */
23682 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23683 if (one_operand_shuffle)
23685 /* Each of these shuffles will put 0s in places where
23686 element from the other 128-bit lane is needed, otherwise
23687 will shuffle in the requested value. */
23688 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23689 gen_lowpart (V32QImode, t6)));
23690 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23691 /* For t3 the 128-bit lanes are swapped again. */
23692 t7 = gen_reg_rtx (V4DImode);
23693 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23694 const2_rtx, GEN_INT (3),
23695 const0_rtx, const1_rtx));
23696 /* And oring both together leads to the result. */
23697 emit_insn (gen_iorv32qi3 (target, t1,
23698 gen_lowpart (V32QImode, t7)));
23699 if (target != operands[0])
23700 emit_move_insn (operands[0],
23701 gen_lowpart (GET_MODE (operands[0]), target));
23705 t4 = gen_reg_rtx (V32QImode);
23706 /* Similarly to the above one_operand_shuffle code,
23707 just for repeated twice for each operand. merge_two:
23708 code will merge the two results together. */
23709 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23710 gen_lowpart (V32QImode, t6)));
23711 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23712 gen_lowpart (V32QImode, t6)));
23713 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23714 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23715 t7 = gen_reg_rtx (V4DImode);
23716 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23717 const2_rtx, GEN_INT (3),
23718 const0_rtx, const1_rtx));
23719 t8 = gen_reg_rtx (V4DImode);
23720 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23721 const2_rtx, GEN_INT (3),
23722 const0_rtx, const1_rtx));
23723 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23724 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23730 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23737 /* The XOP VPPERM insn supports three inputs. By ignoring the
23738 one_operand_shuffle special case, we avoid creating another
23739 set of constant vectors in memory. */
23740 one_operand_shuffle = false;
23742 /* mask = mask & {2*w-1, ...} */
23743 vt = GEN_INT (2*w - 1);
23747 /* mask = mask & {w-1, ...} */
23748 vt = GEN_INT (w - 1);
23751 for (i = 0; i < w; i++)
23753 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23754 mask = expand_simple_binop (maskmode, AND, mask, vt,
23755 NULL_RTX, 0, OPTAB_DIRECT);
23757 /* For non-QImode operations, convert the word permutation control
23758 into a byte permutation control. */
23759 if (mode != V16QImode)
23761 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23762 GEN_INT (exact_log2 (e)),
23763 NULL_RTX, 0, OPTAB_DIRECT);
23765 /* Convert mask to vector of chars. */
23766 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23768 /* Replicate each of the input bytes into byte positions:
23769 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23770 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23771 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23772 for (i = 0; i < 16; ++i)
23773 vec[i] = GEN_INT (i/e * e);
23774 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23775 vt = validize_mem (force_const_mem (V16QImode, vt));
23777 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23779 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23781 /* Convert it into the byte positions by doing
23782 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23783 for (i = 0; i < 16; ++i)
23784 vec[i] = GEN_INT (i % e);
23785 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23786 vt = validize_mem (force_const_mem (V16QImode, vt));
23787 emit_insn (gen_addv16qi3 (mask, mask, vt));
23790 /* The actual shuffle operations all operate on V16QImode. */
23791 op0 = gen_lowpart (V16QImode, op0);
23792 op1 = gen_lowpart (V16QImode, op1);
23796 if (GET_MODE (target) != V16QImode)
23797 target = gen_reg_rtx (V16QImode);
23798 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23799 if (target != operands[0])
23800 emit_move_insn (operands[0],
23801 gen_lowpart (GET_MODE (operands[0]), target));
23803 else if (one_operand_shuffle)
23805 if (GET_MODE (target) != V16QImode)
23806 target = gen_reg_rtx (V16QImode);
23807 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23808 if (target != operands[0])
23809 emit_move_insn (operands[0],
23810 gen_lowpart (GET_MODE (operands[0]), target));
23817 /* Shuffle the two input vectors independently. */
23818 t1 = gen_reg_rtx (V16QImode);
23819 t2 = gen_reg_rtx (V16QImode);
23820 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23821 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23824 /* Then merge them together. The key is whether any given control
23825 element contained a bit set that indicates the second word. */
23826 mask = operands[3];
23828 if (maskmode == V2DImode && !TARGET_SSE4_1)
23830 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23831 more shuffle to convert the V2DI input mask into a V4SI
23832 input mask. At which point the masking that expand_int_vcond
23833 will work as desired. */
23834 rtx t3 = gen_reg_rtx (V4SImode);
23835 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23836 const0_rtx, const0_rtx,
23837 const2_rtx, const2_rtx));
23839 maskmode = V4SImode;
23843 for (i = 0; i < w; i++)
23845 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23846 vt = force_reg (maskmode, vt);
23847 mask = expand_simple_binop (maskmode, AND, mask, vt,
23848 NULL_RTX, 0, OPTAB_DIRECT);
23850 if (GET_MODE (target) != mode)
23851 target = gen_reg_rtx (mode);
23853 xops[1] = gen_lowpart (mode, t2);
23854 xops[2] = gen_lowpart (mode, t1);
23855 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23858 ok = ix86_expand_int_vcond (xops);
23860 if (target != operands[0])
23861 emit_move_insn (operands[0],
23862 gen_lowpart (GET_MODE (operands[0]), target));
23866 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23867 true if we should do zero extension, else sign extension. HIGH_P is
23868 true if we want the N/2 high elements, else the low elements. */
23871 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23873 machine_mode imode = GET_MODE (src);
23878 rtx (*unpack)(rtx, rtx);
23879 rtx (*extract)(rtx, rtx) = NULL;
23880 machine_mode halfmode = BLKmode;
23886 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23888 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23889 halfmode = V32QImode;
23891 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23895 unpack = gen_avx2_zero_extendv16qiv16hi2;
23897 unpack = gen_avx2_sign_extendv16qiv16hi2;
23898 halfmode = V16QImode;
23900 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23904 unpack = gen_avx512f_zero_extendv16hiv16si2;
23906 unpack = gen_avx512f_sign_extendv16hiv16si2;
23907 halfmode = V16HImode;
23909 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23913 unpack = gen_avx2_zero_extendv8hiv8si2;
23915 unpack = gen_avx2_sign_extendv8hiv8si2;
23916 halfmode = V8HImode;
23918 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23922 unpack = gen_avx512f_zero_extendv8siv8di2;
23924 unpack = gen_avx512f_sign_extendv8siv8di2;
23925 halfmode = V8SImode;
23927 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
23931 unpack = gen_avx2_zero_extendv4siv4di2;
23933 unpack = gen_avx2_sign_extendv4siv4di2;
23934 halfmode = V4SImode;
23936 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
23940 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
23942 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
23946 unpack = gen_sse4_1_zero_extendv4hiv4si2;
23948 unpack = gen_sse4_1_sign_extendv4hiv4si2;
23952 unpack = gen_sse4_1_zero_extendv2siv2di2;
23954 unpack = gen_sse4_1_sign_extendv2siv2di2;
23957 gcc_unreachable ();
23960 if (GET_MODE_SIZE (imode) >= 32)
23962 tmp = gen_reg_rtx (halfmode);
23963 emit_insn (extract (tmp, src));
23967 /* Shift higher 8 bytes to lower 8 bytes. */
23968 tmp = gen_reg_rtx (V1TImode);
23969 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
23971 tmp = gen_lowpart (imode, tmp);
23976 emit_insn (unpack (dest, tmp));
23980 rtx (*unpack)(rtx, rtx, rtx);
23986 unpack = gen_vec_interleave_highv16qi;
23988 unpack = gen_vec_interleave_lowv16qi;
23992 unpack = gen_vec_interleave_highv8hi;
23994 unpack = gen_vec_interleave_lowv8hi;
23998 unpack = gen_vec_interleave_highv4si;
24000 unpack = gen_vec_interleave_lowv4si;
24003 gcc_unreachable ();
24007 tmp = force_reg (imode, CONST0_RTX (imode));
24009 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
24010 src, pc_rtx, pc_rtx);
24012 rtx tmp2 = gen_reg_rtx (imode);
24013 emit_insn (unpack (tmp2, src, tmp));
24014 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24018 /* Expand conditional increment or decrement using adb/sbb instructions.
24019 The default case using setcc followed by the conditional move can be
24020 done by generic code. */
24022 ix86_expand_int_addcc (rtx operands[])
24024 enum rtx_code code = GET_CODE (operands[1]);
24026 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24028 rtx val = const0_rtx;
24029 bool fpcmp = false;
24031 rtx op0 = XEXP (operands[1], 0);
24032 rtx op1 = XEXP (operands[1], 1);
24034 if (operands[3] != const1_rtx
24035 && operands[3] != constm1_rtx)
24037 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24039 code = GET_CODE (compare_op);
24041 flags = XEXP (compare_op, 0);
24043 if (GET_MODE (flags) == CCFPmode
24044 || GET_MODE (flags) == CCFPUmode)
24047 code = ix86_fp_compare_code_to_integer (code);
24054 PUT_CODE (compare_op,
24055 reverse_condition_maybe_unordered
24056 (GET_CODE (compare_op)));
24058 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24061 mode = GET_MODE (operands[0]);
24063 /* Construct either adc or sbb insn. */
24064 if ((code == LTU) == (operands[3] == constm1_rtx))
24069 insn = gen_subqi3_carry;
24072 insn = gen_subhi3_carry;
24075 insn = gen_subsi3_carry;
24078 insn = gen_subdi3_carry;
24081 gcc_unreachable ();
24089 insn = gen_addqi3_carry;
24092 insn = gen_addhi3_carry;
24095 insn = gen_addsi3_carry;
24098 insn = gen_adddi3_carry;
24101 gcc_unreachable ();
24104 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24110 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24111 but works for floating pointer parameters and nonoffsetable memories.
24112 For pushes, it returns just stack offsets; the values will be saved
24113 in the right order. Maximally three parts are generated. */
24116 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24121 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24123 size = (GET_MODE_SIZE (mode) + 4) / 8;
24125 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24126 gcc_assert (size >= 2 && size <= 4);
24128 /* Optimize constant pool reference to immediates. This is used by fp
24129 moves, that force all constants to memory to allow combining. */
24130 if (MEM_P (operand) && MEM_READONLY_P (operand))
24132 rtx tmp = maybe_get_pool_constant (operand);
24137 if (MEM_P (operand) && !offsettable_memref_p (operand))
24139 /* The only non-offsetable memories we handle are pushes. */
24140 int ok = push_operand (operand, VOIDmode);
24144 operand = copy_rtx (operand);
24145 PUT_MODE (operand, word_mode);
24146 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24150 if (GET_CODE (operand) == CONST_VECTOR)
24152 machine_mode imode = int_mode_for_mode (mode);
24153 /* Caution: if we looked through a constant pool memory above,
24154 the operand may actually have a different mode now. That's
24155 ok, since we want to pun this all the way back to an integer. */
24156 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24157 gcc_assert (operand != NULL);
24163 if (mode == DImode)
24164 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24169 if (REG_P (operand))
24171 gcc_assert (reload_completed);
24172 for (i = 0; i < size; i++)
24173 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24175 else if (offsettable_memref_p (operand))
24177 operand = adjust_address (operand, SImode, 0);
24178 parts[0] = operand;
24179 for (i = 1; i < size; i++)
24180 parts[i] = adjust_address (operand, SImode, 4 * i);
24182 else if (CONST_DOUBLE_P (operand))
24184 const REAL_VALUE_TYPE *r;
24187 r = CONST_DOUBLE_REAL_VALUE (operand);
24191 real_to_target (l, r, mode);
24192 parts[3] = gen_int_mode (l[3], SImode);
24193 parts[2] = gen_int_mode (l[2], SImode);
24196 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24197 long double may not be 80-bit. */
24198 real_to_target (l, r, mode);
24199 parts[2] = gen_int_mode (l[2], SImode);
24202 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24205 gcc_unreachable ();
24207 parts[1] = gen_int_mode (l[1], SImode);
24208 parts[0] = gen_int_mode (l[0], SImode);
24211 gcc_unreachable ();
24216 if (mode == TImode)
24217 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24218 if (mode == XFmode || mode == TFmode)
24220 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24221 if (REG_P (operand))
24223 gcc_assert (reload_completed);
24224 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24225 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24227 else if (offsettable_memref_p (operand))
24229 operand = adjust_address (operand, DImode, 0);
24230 parts[0] = operand;
24231 parts[1] = adjust_address (operand, upper_mode, 8);
24233 else if (CONST_DOUBLE_P (operand))
24237 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24239 /* real_to_target puts 32-bit pieces in each long. */
24242 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
24243 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
24246 if (upper_mode == SImode)
24247 parts[1] = gen_int_mode (l[2], SImode);
24251 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
24252 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
24256 gcc_unreachable ();
24263 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24264 Return false when normal moves are needed; true when all required
24265 insns have been emitted. Operands 2-4 contain the input values
24266 int the correct order; operands 5-7 contain the output values. */
24269 ix86_split_long_move (rtx operands[])
24274 int collisions = 0;
24275 machine_mode mode = GET_MODE (operands[0]);
24276 bool collisionparts[4];
24278 /* The DFmode expanders may ask us to move double.
24279 For 64bit target this is single move. By hiding the fact
24280 here we simplify i386.md splitters. */
24281 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24283 /* Optimize constant pool reference to immediates. This is used by
24284 fp moves, that force all constants to memory to allow combining. */
24286 if (MEM_P (operands[1])
24287 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24288 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24289 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24290 if (push_operand (operands[0], VOIDmode))
24292 operands[0] = copy_rtx (operands[0]);
24293 PUT_MODE (operands[0], word_mode);
24296 operands[0] = gen_lowpart (DImode, operands[0]);
24297 operands[1] = gen_lowpart (DImode, operands[1]);
24298 emit_move_insn (operands[0], operands[1]);
24302 /* The only non-offsettable memory we handle is push. */
24303 if (push_operand (operands[0], VOIDmode))
24306 gcc_assert (!MEM_P (operands[0])
24307 || offsettable_memref_p (operands[0]));
24309 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24310 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24312 /* When emitting push, take care for source operands on the stack. */
24313 if (push && MEM_P (operands[1])
24314 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24316 rtx src_base = XEXP (part[1][nparts - 1], 0);
24318 /* Compensate for the stack decrement by 4. */
24319 if (!TARGET_64BIT && nparts == 3
24320 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24321 src_base = plus_constant (Pmode, src_base, 4);
24323 /* src_base refers to the stack pointer and is
24324 automatically decreased by emitted push. */
24325 for (i = 0; i < nparts; i++)
24326 part[1][i] = change_address (part[1][i],
24327 GET_MODE (part[1][i]), src_base);
24330 /* We need to do copy in the right order in case an address register
24331 of the source overlaps the destination. */
24332 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24336 for (i = 0; i < nparts; i++)
24339 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24340 if (collisionparts[i])
24344 /* Collision in the middle part can be handled by reordering. */
24345 if (collisions == 1 && nparts == 3 && collisionparts [1])
24347 std::swap (part[0][1], part[0][2]);
24348 std::swap (part[1][1], part[1][2]);
24350 else if (collisions == 1
24352 && (collisionparts [1] || collisionparts [2]))
24354 if (collisionparts [1])
24356 std::swap (part[0][1], part[0][2]);
24357 std::swap (part[1][1], part[1][2]);
24361 std::swap (part[0][2], part[0][3]);
24362 std::swap (part[1][2], part[1][3]);
24366 /* If there are more collisions, we can't handle it by reordering.
24367 Do an lea to the last part and use only one colliding move. */
24368 else if (collisions > 1)
24370 rtx base, addr, tls_base = NULL_RTX;
24374 base = part[0][nparts - 1];
24376 /* Handle the case when the last part isn't valid for lea.
24377 Happens in 64-bit mode storing the 12-byte XFmode. */
24378 if (GET_MODE (base) != Pmode)
24379 base = gen_rtx_REG (Pmode, REGNO (base));
24381 addr = XEXP (part[1][0], 0);
24382 if (TARGET_TLS_DIRECT_SEG_REFS)
24384 struct ix86_address parts;
24385 int ok = ix86_decompose_address (addr, &parts);
24387 if (parts.seg == DEFAULT_TLS_SEG_REG)
24389 /* It is not valid to use %gs: or %fs: in
24390 lea though, so we need to remove it from the
24391 address used for lea and add it to each individual
24392 memory loads instead. */
24393 addr = copy_rtx (addr);
24395 while (GET_CODE (*x) == PLUS)
24397 for (i = 0; i < 2; i++)
24399 rtx u = XEXP (*x, i);
24400 if (GET_CODE (u) == ZERO_EXTEND)
24402 if (GET_CODE (u) == UNSPEC
24403 && XINT (u, 1) == UNSPEC_TP)
24405 tls_base = XEXP (*x, i);
24406 *x = XEXP (*x, 1 - i);
24414 gcc_assert (tls_base);
24417 emit_insn (gen_rtx_SET (base, addr));
24419 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24420 part[1][0] = replace_equiv_address (part[1][0], base);
24421 for (i = 1; i < nparts; i++)
24424 base = copy_rtx (base);
24425 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24426 part[1][i] = replace_equiv_address (part[1][i], tmp);
24437 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24438 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24439 stack_pointer_rtx, GEN_INT (-4)));
24440 emit_move_insn (part[0][2], part[1][2]);
24442 else if (nparts == 4)
24444 emit_move_insn (part[0][3], part[1][3]);
24445 emit_move_insn (part[0][2], part[1][2]);
24450 /* In 64bit mode we don't have 32bit push available. In case this is
24451 register, it is OK - we will just use larger counterpart. We also
24452 retype memory - these comes from attempt to avoid REX prefix on
24453 moving of second half of TFmode value. */
24454 if (GET_MODE (part[1][1]) == SImode)
24456 switch (GET_CODE (part[1][1]))
24459 part[1][1] = adjust_address (part[1][1], DImode, 0);
24463 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24467 gcc_unreachable ();
24470 if (GET_MODE (part[1][0]) == SImode)
24471 part[1][0] = part[1][1];
24474 emit_move_insn (part[0][1], part[1][1]);
24475 emit_move_insn (part[0][0], part[1][0]);
24479 /* Choose correct order to not overwrite the source before it is copied. */
24480 if ((REG_P (part[0][0])
24481 && REG_P (part[1][1])
24482 && (REGNO (part[0][0]) == REGNO (part[1][1])
24484 && REGNO (part[0][0]) == REGNO (part[1][2]))
24486 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24488 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24490 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24492 operands[2 + i] = part[0][j];
24493 operands[6 + i] = part[1][j];
24498 for (i = 0; i < nparts; i++)
24500 operands[2 + i] = part[0][i];
24501 operands[6 + i] = part[1][i];
24505 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24506 if (optimize_insn_for_size_p ())
24508 for (j = 0; j < nparts - 1; j++)
24509 if (CONST_INT_P (operands[6 + j])
24510 && operands[6 + j] != const0_rtx
24511 && REG_P (operands[2 + j]))
24512 for (i = j; i < nparts - 1; i++)
24513 if (CONST_INT_P (operands[7 + i])
24514 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24515 operands[7 + i] = operands[2 + j];
24518 for (i = 0; i < nparts; i++)
24519 emit_move_insn (operands[2 + i], operands[6 + i]);
24524 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24525 left shift by a constant, either using a single shift or
24526 a sequence of add instructions. */
24529 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24531 rtx (*insn)(rtx, rtx, rtx);
24534 || (count * ix86_cost->add <= ix86_cost->shift_const
24535 && !optimize_insn_for_size_p ()))
24537 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24538 while (count-- > 0)
24539 emit_insn (insn (operand, operand, operand));
24543 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24544 emit_insn (insn (operand, operand, GEN_INT (count)));
24549 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24551 rtx (*gen_ashl3)(rtx, rtx, rtx);
24552 rtx (*gen_shld)(rtx, rtx, rtx);
24553 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24555 rtx low[2], high[2];
24558 if (CONST_INT_P (operands[2]))
24560 split_double_mode (mode, operands, 2, low, high);
24561 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24563 if (count >= half_width)
24565 emit_move_insn (high[0], low[1]);
24566 emit_move_insn (low[0], const0_rtx);
24568 if (count > half_width)
24569 ix86_expand_ashl_const (high[0], count - half_width, mode);
24573 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24575 if (!rtx_equal_p (operands[0], operands[1]))
24576 emit_move_insn (operands[0], operands[1]);
24578 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24579 ix86_expand_ashl_const (low[0], count, mode);
24584 split_double_mode (mode, operands, 1, low, high);
24586 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24588 if (operands[1] == const1_rtx)
24590 /* Assuming we've chosen a QImode capable registers, then 1 << N
24591 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24592 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24594 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24596 ix86_expand_clear (low[0]);
24597 ix86_expand_clear (high[0]);
24598 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24600 d = gen_lowpart (QImode, low[0]);
24601 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24602 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24603 emit_insn (gen_rtx_SET (d, s));
24605 d = gen_lowpart (QImode, high[0]);
24606 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24607 s = gen_rtx_NE (QImode, flags, const0_rtx);
24608 emit_insn (gen_rtx_SET (d, s));
24611 /* Otherwise, we can get the same results by manually performing
24612 a bit extract operation on bit 5/6, and then performing the two
24613 shifts. The two methods of getting 0/1 into low/high are exactly
24614 the same size. Avoiding the shift in the bit extract case helps
24615 pentium4 a bit; no one else seems to care much either way. */
24618 machine_mode half_mode;
24619 rtx (*gen_lshr3)(rtx, rtx, rtx);
24620 rtx (*gen_and3)(rtx, rtx, rtx);
24621 rtx (*gen_xor3)(rtx, rtx, rtx);
24622 HOST_WIDE_INT bits;
24625 if (mode == DImode)
24627 half_mode = SImode;
24628 gen_lshr3 = gen_lshrsi3;
24629 gen_and3 = gen_andsi3;
24630 gen_xor3 = gen_xorsi3;
24635 half_mode = DImode;
24636 gen_lshr3 = gen_lshrdi3;
24637 gen_and3 = gen_anddi3;
24638 gen_xor3 = gen_xordi3;
24642 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24643 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24645 x = gen_lowpart (half_mode, operands[2]);
24646 emit_insn (gen_rtx_SET (high[0], x));
24648 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24649 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24650 emit_move_insn (low[0], high[0]);
24651 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24654 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24655 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24659 if (operands[1] == constm1_rtx)
24661 /* For -1 << N, we can avoid the shld instruction, because we
24662 know that we're shifting 0...31/63 ones into a -1. */
24663 emit_move_insn (low[0], constm1_rtx);
24664 if (optimize_insn_for_size_p ())
24665 emit_move_insn (high[0], low[0]);
24667 emit_move_insn (high[0], constm1_rtx);
24671 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24673 if (!rtx_equal_p (operands[0], operands[1]))
24674 emit_move_insn (operands[0], operands[1]);
24676 split_double_mode (mode, operands, 1, low, high);
24677 emit_insn (gen_shld (high[0], low[0], operands[2]));
24680 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24682 if (TARGET_CMOVE && scratch)
24684 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24685 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24687 ix86_expand_clear (scratch);
24688 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24692 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24693 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24695 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24700 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24702 rtx (*gen_ashr3)(rtx, rtx, rtx)
24703 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24704 rtx (*gen_shrd)(rtx, rtx, rtx);
24705 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24707 rtx low[2], high[2];
24710 if (CONST_INT_P (operands[2]))
24712 split_double_mode (mode, operands, 2, low, high);
24713 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24715 if (count == GET_MODE_BITSIZE (mode) - 1)
24717 emit_move_insn (high[0], high[1]);
24718 emit_insn (gen_ashr3 (high[0], high[0],
24719 GEN_INT (half_width - 1)));
24720 emit_move_insn (low[0], high[0]);
24723 else if (count >= half_width)
24725 emit_move_insn (low[0], high[1]);
24726 emit_move_insn (high[0], low[0]);
24727 emit_insn (gen_ashr3 (high[0], high[0],
24728 GEN_INT (half_width - 1)));
24730 if (count > half_width)
24731 emit_insn (gen_ashr3 (low[0], low[0],
24732 GEN_INT (count - half_width)));
24736 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24738 if (!rtx_equal_p (operands[0], operands[1]))
24739 emit_move_insn (operands[0], operands[1]);
24741 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24742 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24747 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24749 if (!rtx_equal_p (operands[0], operands[1]))
24750 emit_move_insn (operands[0], operands[1]);
24752 split_double_mode (mode, operands, 1, low, high);
24754 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24755 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24757 if (TARGET_CMOVE && scratch)
24759 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24760 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24762 emit_move_insn (scratch, high[0]);
24763 emit_insn (gen_ashr3 (scratch, scratch,
24764 GEN_INT (half_width - 1)));
24765 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24770 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24771 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24773 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24779 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24781 rtx (*gen_lshr3)(rtx, rtx, rtx)
24782 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24783 rtx (*gen_shrd)(rtx, rtx, rtx);
24784 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24786 rtx low[2], high[2];
24789 if (CONST_INT_P (operands[2]))
24791 split_double_mode (mode, operands, 2, low, high);
24792 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24794 if (count >= half_width)
24796 emit_move_insn (low[0], high[1]);
24797 ix86_expand_clear (high[0]);
24799 if (count > half_width)
24800 emit_insn (gen_lshr3 (low[0], low[0],
24801 GEN_INT (count - half_width)));
24805 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24807 if (!rtx_equal_p (operands[0], operands[1]))
24808 emit_move_insn (operands[0], operands[1]);
24810 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24811 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24816 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24818 if (!rtx_equal_p (operands[0], operands[1]))
24819 emit_move_insn (operands[0], operands[1]);
24821 split_double_mode (mode, operands, 1, low, high);
24823 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24824 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24826 if (TARGET_CMOVE && scratch)
24828 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24829 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24831 ix86_expand_clear (scratch);
24832 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24837 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24838 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24840 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24845 /* Predict just emitted jump instruction to be taken with probability PROB. */
24847 predict_jump (int prob)
24849 rtx insn = get_last_insn ();
24850 gcc_assert (JUMP_P (insn));
24851 add_int_reg_note (insn, REG_BR_PROB, prob);
24854 /* Helper function for the string operations below. Dest VARIABLE whether
24855 it is aligned to VALUE bytes. If true, jump to the label. */
24856 static rtx_code_label *
24857 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24859 rtx_code_label *label = gen_label_rtx ();
24860 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24861 if (GET_MODE (variable) == DImode)
24862 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24864 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24865 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24868 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24870 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24874 /* Adjust COUNTER by the VALUE. */
24876 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24878 rtx (*gen_add)(rtx, rtx, rtx)
24879 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24881 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24884 /* Zero extend possibly SImode EXP to Pmode register. */
24886 ix86_zero_extend_to_Pmode (rtx exp)
24888 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24891 /* Divide COUNTREG by SCALE. */
24893 scale_counter (rtx countreg, int scale)
24899 if (CONST_INT_P (countreg))
24900 return GEN_INT (INTVAL (countreg) / scale);
24901 gcc_assert (REG_P (countreg));
24903 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24904 GEN_INT (exact_log2 (scale)),
24905 NULL, 1, OPTAB_DIRECT);
24909 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24910 DImode for constant loop counts. */
24912 static machine_mode
24913 counter_mode (rtx count_exp)
24915 if (GET_MODE (count_exp) != VOIDmode)
24916 return GET_MODE (count_exp);
24917 if (!CONST_INT_P (count_exp))
24919 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24924 /* Copy the address to a Pmode register. This is used for x32 to
24925 truncate DImode TLS address to a SImode register. */
24928 ix86_copy_addr_to_reg (rtx addr)
24931 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
24933 reg = copy_addr_to_reg (addr);
24934 REG_POINTER (reg) = 1;
24939 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
24940 reg = copy_to_mode_reg (DImode, addr);
24941 REG_POINTER (reg) = 1;
24942 return gen_rtx_SUBREG (SImode, reg, 0);
24946 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
24947 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
24948 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
24949 memory by VALUE (supposed to be in MODE).
24951 The size is rounded down to whole number of chunk size moved at once.
24952 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
24956 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
24957 rtx destptr, rtx srcptr, rtx value,
24958 rtx count, machine_mode mode, int unroll,
24959 int expected_size, bool issetmem)
24961 rtx_code_label *out_label, *top_label;
24963 machine_mode iter_mode = counter_mode (count);
24964 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
24965 rtx piece_size = GEN_INT (piece_size_n);
24966 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
24970 top_label = gen_label_rtx ();
24971 out_label = gen_label_rtx ();
24972 iter = gen_reg_rtx (iter_mode);
24974 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
24975 NULL, 1, OPTAB_DIRECT);
24976 /* Those two should combine. */
24977 if (piece_size == const1_rtx)
24979 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
24981 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24983 emit_move_insn (iter, const0_rtx);
24985 emit_label (top_label);
24987 tmp = convert_modes (Pmode, iter_mode, iter, true);
24989 /* This assert could be relaxed - in this case we'll need to compute
24990 smallest power of two, containing in PIECE_SIZE_N and pass it to
24992 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
24993 destmem = offset_address (destmem, tmp, piece_size_n);
24994 destmem = adjust_address (destmem, mode, 0);
24998 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
24999 srcmem = adjust_address (srcmem, mode, 0);
25001 /* When unrolling for chips that reorder memory reads and writes,
25002 we can save registers by using single temporary.
25003 Also using 4 temporaries is overkill in 32bit mode. */
25004 if (!TARGET_64BIT && 0)
25006 for (i = 0; i < unroll; i++)
25011 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25013 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25015 emit_move_insn (destmem, srcmem);
25021 gcc_assert (unroll <= 4);
25022 for (i = 0; i < unroll; i++)
25024 tmpreg[i] = gen_reg_rtx (mode);
25028 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25030 emit_move_insn (tmpreg[i], srcmem);
25032 for (i = 0; i < unroll; i++)
25037 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25039 emit_move_insn (destmem, tmpreg[i]);
25044 for (i = 0; i < unroll; i++)
25048 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25049 emit_move_insn (destmem, value);
25052 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25053 true, OPTAB_LIB_WIDEN);
25055 emit_move_insn (iter, tmp);
25057 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25059 if (expected_size != -1)
25061 expected_size /= GET_MODE_SIZE (mode) * unroll;
25062 if (expected_size == 0)
25064 else if (expected_size > REG_BR_PROB_BASE)
25065 predict_jump (REG_BR_PROB_BASE - 1);
25067 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25070 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25071 iter = ix86_zero_extend_to_Pmode (iter);
25072 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25073 true, OPTAB_LIB_WIDEN);
25074 if (tmp != destptr)
25075 emit_move_insn (destptr, tmp);
25078 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25079 true, OPTAB_LIB_WIDEN);
25081 emit_move_insn (srcptr, tmp);
25083 emit_label (out_label);
25086 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25087 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25088 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25089 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25090 ORIG_VALUE is the original value passed to memset to fill the memory with.
25091 Other arguments have same meaning as for previous function. */
25094 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25095 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25097 machine_mode mode, bool issetmem)
25102 HOST_WIDE_INT rounded_count;
25104 /* If possible, it is shorter to use rep movs.
25105 TODO: Maybe it is better to move this logic to decide_alg. */
25106 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25107 && (!issetmem || orig_value == const0_rtx))
25110 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25111 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25113 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25114 GET_MODE_SIZE (mode)));
25115 if (mode != QImode)
25117 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25118 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25119 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25122 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25123 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25126 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25127 destmem = shallow_copy_rtx (destmem);
25128 set_mem_size (destmem, rounded_count);
25130 else if (MEM_SIZE_KNOWN_P (destmem))
25131 clear_mem_size (destmem);
25135 value = force_reg (mode, gen_lowpart (mode, value));
25136 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25140 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25141 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25142 if (mode != QImode)
25144 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25145 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25146 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25149 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25150 if (CONST_INT_P (count))
25153 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25154 srcmem = shallow_copy_rtx (srcmem);
25155 set_mem_size (srcmem, rounded_count);
25159 if (MEM_SIZE_KNOWN_P (srcmem))
25160 clear_mem_size (srcmem);
25162 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25167 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25169 SRC is passed by pointer to be updated on return.
25170 Return value is updated DST. */
25172 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25173 HOST_WIDE_INT size_to_move)
25175 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25176 enum insn_code code;
25177 machine_mode move_mode;
25180 /* Find the widest mode in which we could perform moves.
25181 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25182 it until move of such size is supported. */
25183 piece_size = 1 << floor_log2 (size_to_move);
25184 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25185 code = optab_handler (mov_optab, move_mode);
25186 while (code == CODE_FOR_nothing && piece_size > 1)
25189 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25190 code = optab_handler (mov_optab, move_mode);
25193 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25194 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25195 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25197 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25198 move_mode = mode_for_vector (word_mode, nunits);
25199 code = optab_handler (mov_optab, move_mode);
25200 if (code == CODE_FOR_nothing)
25202 move_mode = word_mode;
25203 piece_size = GET_MODE_SIZE (move_mode);
25204 code = optab_handler (mov_optab, move_mode);
25207 gcc_assert (code != CODE_FOR_nothing);
25209 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25210 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25212 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25213 gcc_assert (size_to_move % piece_size == 0);
25214 adjust = GEN_INT (piece_size);
25215 for (i = 0; i < size_to_move; i += piece_size)
25217 /* We move from memory to memory, so we'll need to do it via
25218 a temporary register. */
25219 tempreg = gen_reg_rtx (move_mode);
25220 emit_insn (GEN_FCN (code) (tempreg, src));
25221 emit_insn (GEN_FCN (code) (dst, tempreg));
25223 emit_move_insn (destptr,
25224 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25225 emit_move_insn (srcptr,
25226 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25228 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25230 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25234 /* Update DST and SRC rtx. */
25239 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25241 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25242 rtx destptr, rtx srcptr, rtx count, int max_size)
25245 if (CONST_INT_P (count))
25247 HOST_WIDE_INT countval = INTVAL (count);
25248 HOST_WIDE_INT epilogue_size = countval % max_size;
25251 /* For now MAX_SIZE should be a power of 2. This assert could be
25252 relaxed, but it'll require a bit more complicated epilogue
25254 gcc_assert ((max_size & (max_size - 1)) == 0);
25255 for (i = max_size; i >= 1; i >>= 1)
25257 if (epilogue_size & i)
25258 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25264 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25265 count, 1, OPTAB_DIRECT);
25266 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25267 count, QImode, 1, 4, false);
25271 /* When there are stringops, we can cheaply increase dest and src pointers.
25272 Otherwise we save code size by maintaining offset (zero is readily
25273 available from preceding rep operation) and using x86 addressing modes.
25275 if (TARGET_SINGLE_STRINGOP)
25279 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25280 src = change_address (srcmem, SImode, srcptr);
25281 dest = change_address (destmem, SImode, destptr);
25282 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25283 emit_label (label);
25284 LABEL_NUSES (label) = 1;
25288 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25289 src = change_address (srcmem, HImode, srcptr);
25290 dest = change_address (destmem, HImode, destptr);
25291 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25292 emit_label (label);
25293 LABEL_NUSES (label) = 1;
25297 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25298 src = change_address (srcmem, QImode, srcptr);
25299 dest = change_address (destmem, QImode, destptr);
25300 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25301 emit_label (label);
25302 LABEL_NUSES (label) = 1;
25307 rtx offset = force_reg (Pmode, const0_rtx);
25312 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25313 src = change_address (srcmem, SImode, srcptr);
25314 dest = change_address (destmem, SImode, destptr);
25315 emit_move_insn (dest, src);
25316 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25317 true, OPTAB_LIB_WIDEN);
25319 emit_move_insn (offset, tmp);
25320 emit_label (label);
25321 LABEL_NUSES (label) = 1;
25325 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25326 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25327 src = change_address (srcmem, HImode, tmp);
25328 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25329 dest = change_address (destmem, HImode, tmp);
25330 emit_move_insn (dest, src);
25331 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25332 true, OPTAB_LIB_WIDEN);
25334 emit_move_insn (offset, tmp);
25335 emit_label (label);
25336 LABEL_NUSES (label) = 1;
25340 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25341 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25342 src = change_address (srcmem, QImode, tmp);
25343 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25344 dest = change_address (destmem, QImode, tmp);
25345 emit_move_insn (dest, src);
25346 emit_label (label);
25347 LABEL_NUSES (label) = 1;
25352 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25353 with value PROMOTED_VAL.
25354 SRC is passed by pointer to be updated on return.
25355 Return value is updated DST. */
25357 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25358 HOST_WIDE_INT size_to_move)
25360 rtx dst = destmem, adjust;
25361 enum insn_code code;
25362 machine_mode move_mode;
25365 /* Find the widest mode in which we could perform moves.
25366 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25367 it until move of such size is supported. */
25368 move_mode = GET_MODE (promoted_val);
25369 if (move_mode == VOIDmode)
25370 move_mode = QImode;
25371 if (size_to_move < GET_MODE_SIZE (move_mode))
25373 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25374 promoted_val = gen_lowpart (move_mode, promoted_val);
25376 piece_size = GET_MODE_SIZE (move_mode);
25377 code = optab_handler (mov_optab, move_mode);
25378 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25380 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25382 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25383 gcc_assert (size_to_move % piece_size == 0);
25384 adjust = GEN_INT (piece_size);
25385 for (i = 0; i < size_to_move; i += piece_size)
25387 if (piece_size <= GET_MODE_SIZE (word_mode))
25389 emit_insn (gen_strset (destptr, dst, promoted_val));
25390 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25395 emit_insn (GEN_FCN (code) (dst, promoted_val));
25397 emit_move_insn (destptr,
25398 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25400 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25404 /* Update DST rtx. */
25407 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25409 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25410 rtx count, int max_size)
25413 expand_simple_binop (counter_mode (count), AND, count,
25414 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25415 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25416 gen_lowpart (QImode, value), count, QImode,
25417 1, max_size / 2, true);
25420 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25422 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25423 rtx count, int max_size)
25427 if (CONST_INT_P (count))
25429 HOST_WIDE_INT countval = INTVAL (count);
25430 HOST_WIDE_INT epilogue_size = countval % max_size;
25433 /* For now MAX_SIZE should be a power of 2. This assert could be
25434 relaxed, but it'll require a bit more complicated epilogue
25436 gcc_assert ((max_size & (max_size - 1)) == 0);
25437 for (i = max_size; i >= 1; i >>= 1)
25439 if (epilogue_size & i)
25441 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25442 destmem = emit_memset (destmem, destptr, vec_value, i);
25444 destmem = emit_memset (destmem, destptr, value, i);
25451 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25456 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25459 dest = change_address (destmem, DImode, destptr);
25460 emit_insn (gen_strset (destptr, dest, value));
25461 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25462 emit_insn (gen_strset (destptr, dest, value));
25466 dest = change_address (destmem, SImode, destptr);
25467 emit_insn (gen_strset (destptr, dest, value));
25468 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25469 emit_insn (gen_strset (destptr, dest, value));
25470 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25471 emit_insn (gen_strset (destptr, dest, value));
25472 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25473 emit_insn (gen_strset (destptr, dest, value));
25475 emit_label (label);
25476 LABEL_NUSES (label) = 1;
25480 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25483 dest = change_address (destmem, DImode, destptr);
25484 emit_insn (gen_strset (destptr, dest, value));
25488 dest = change_address (destmem, SImode, destptr);
25489 emit_insn (gen_strset (destptr, dest, value));
25490 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25491 emit_insn (gen_strset (destptr, dest, value));
25493 emit_label (label);
25494 LABEL_NUSES (label) = 1;
25498 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25499 dest = change_address (destmem, SImode, destptr);
25500 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25501 emit_label (label);
25502 LABEL_NUSES (label) = 1;
25506 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25507 dest = change_address (destmem, HImode, destptr);
25508 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25509 emit_label (label);
25510 LABEL_NUSES (label) = 1;
25514 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25515 dest = change_address (destmem, QImode, destptr);
25516 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25517 emit_label (label);
25518 LABEL_NUSES (label) = 1;
25522 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25523 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25524 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25526 Return value is updated DESTMEM. */
25528 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25529 rtx destptr, rtx srcptr, rtx value,
25530 rtx vec_value, rtx count, int align,
25531 int desired_alignment, bool issetmem)
25534 for (i = 1; i < desired_alignment; i <<= 1)
25538 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25541 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25542 destmem = emit_memset (destmem, destptr, vec_value, i);
25544 destmem = emit_memset (destmem, destptr, value, i);
25547 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25548 ix86_adjust_counter (count, i);
25549 emit_label (label);
25550 LABEL_NUSES (label) = 1;
25551 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25557 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25558 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25559 and jump to DONE_LABEL. */
25561 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25562 rtx destptr, rtx srcptr,
25563 rtx value, rtx vec_value,
25564 rtx count, int size,
25565 rtx done_label, bool issetmem)
25567 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25568 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25572 /* If we do not have vector value to copy, we must reduce size. */
25577 if (GET_MODE (value) == VOIDmode && size > 8)
25579 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25580 mode = GET_MODE (value);
25583 mode = GET_MODE (vec_value), value = vec_value;
25587 /* Choose appropriate vector mode. */
25589 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25590 else if (size >= 16)
25591 mode = TARGET_SSE ? V16QImode : DImode;
25592 srcmem = change_address (srcmem, mode, srcptr);
25594 destmem = change_address (destmem, mode, destptr);
25595 modesize = GEN_INT (GET_MODE_SIZE (mode));
25596 gcc_assert (GET_MODE_SIZE (mode) <= size);
25597 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25600 emit_move_insn (destmem, gen_lowpart (mode, value));
25603 emit_move_insn (destmem, srcmem);
25604 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25606 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25609 destmem = offset_address (destmem, count, 1);
25610 destmem = offset_address (destmem, GEN_INT (-2 * size),
25611 GET_MODE_SIZE (mode));
25614 srcmem = offset_address (srcmem, count, 1);
25615 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25616 GET_MODE_SIZE (mode));
25618 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25621 emit_move_insn (destmem, gen_lowpart (mode, value));
25624 emit_move_insn (destmem, srcmem);
25625 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25627 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25629 emit_jump_insn (gen_jump (done_label));
25632 emit_label (label);
25633 LABEL_NUSES (label) = 1;
25636 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25637 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25638 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25639 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25640 DONE_LABEL is a label after the whole copying sequence. The label is created
25641 on demand if *DONE_LABEL is NULL.
25642 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25643 bounds after the initial copies.
25645 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25646 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25647 we will dispatch to a library call for large blocks.
25649 In pseudocode we do:
25653 Assume that SIZE is 4. Bigger sizes are handled analogously
25656 copy 4 bytes from SRCPTR to DESTPTR
25657 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25662 copy 1 byte from SRCPTR to DESTPTR
25665 copy 2 bytes from SRCPTR to DESTPTR
25666 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25671 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25672 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25674 OLD_DESPTR = DESTPTR;
25675 Align DESTPTR up to DESIRED_ALIGN
25676 SRCPTR += DESTPTR - OLD_DESTPTR
25677 COUNT -= DEST_PTR - OLD_DESTPTR
25679 Round COUNT down to multiple of SIZE
25680 << optional caller supplied zero size guard is here >>
25681 << optional caller suppplied dynamic check is here >>
25682 << caller supplied main copy loop is here >>
25687 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25688 rtx *destptr, rtx *srcptr,
25690 rtx value, rtx vec_value,
25692 rtx_code_label **done_label,
25696 unsigned HOST_WIDE_INT *min_size,
25697 bool dynamic_check,
25700 rtx_code_label *loop_label = NULL, *label;
25703 int prolog_size = 0;
25706 /* Chose proper value to copy. */
25707 if (issetmem && VECTOR_MODE_P (mode))
25708 mode_value = vec_value;
25710 mode_value = value;
25711 gcc_assert (GET_MODE_SIZE (mode) <= size);
25713 /* See if block is big or small, handle small blocks. */
25714 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25717 loop_label = gen_label_rtx ();
25720 *done_label = gen_label_rtx ();
25722 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25726 /* Handle sizes > 3. */
25727 for (;size2 > 2; size2 >>= 1)
25728 expand_small_movmem_or_setmem (destmem, srcmem,
25732 size2, *done_label, issetmem);
25733 /* Nothing to copy? Jump to DONE_LABEL if so */
25734 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25737 /* Do a byte copy. */
25738 destmem = change_address (destmem, QImode, *destptr);
25740 emit_move_insn (destmem, gen_lowpart (QImode, value));
25743 srcmem = change_address (srcmem, QImode, *srcptr);
25744 emit_move_insn (destmem, srcmem);
25747 /* Handle sizes 2 and 3. */
25748 label = ix86_expand_aligntest (*count, 2, false);
25749 destmem = change_address (destmem, HImode, *destptr);
25750 destmem = offset_address (destmem, *count, 1);
25751 destmem = offset_address (destmem, GEN_INT (-2), 2);
25753 emit_move_insn (destmem, gen_lowpart (HImode, value));
25756 srcmem = change_address (srcmem, HImode, *srcptr);
25757 srcmem = offset_address (srcmem, *count, 1);
25758 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25759 emit_move_insn (destmem, srcmem);
25762 emit_label (label);
25763 LABEL_NUSES (label) = 1;
25764 emit_jump_insn (gen_jump (*done_label));
25768 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25769 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25771 /* Start memcpy for COUNT >= SIZE. */
25774 emit_label (loop_label);
25775 LABEL_NUSES (loop_label) = 1;
25778 /* Copy first desired_align bytes. */
25780 srcmem = change_address (srcmem, mode, *srcptr);
25781 destmem = change_address (destmem, mode, *destptr);
25782 modesize = GEN_INT (GET_MODE_SIZE (mode));
25783 for (n = 0; prolog_size < desired_align - align; n++)
25786 emit_move_insn (destmem, mode_value);
25789 emit_move_insn (destmem, srcmem);
25790 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25792 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25793 prolog_size += GET_MODE_SIZE (mode);
25797 /* Copy last SIZE bytes. */
25798 destmem = offset_address (destmem, *count, 1);
25799 destmem = offset_address (destmem,
25800 GEN_INT (-size - prolog_size),
25803 emit_move_insn (destmem, mode_value);
25806 srcmem = offset_address (srcmem, *count, 1);
25807 srcmem = offset_address (srcmem,
25808 GEN_INT (-size - prolog_size),
25810 emit_move_insn (destmem, srcmem);
25812 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25814 destmem = offset_address (destmem, modesize, 1);
25816 emit_move_insn (destmem, mode_value);
25819 srcmem = offset_address (srcmem, modesize, 1);
25820 emit_move_insn (destmem, srcmem);
25824 /* Align destination. */
25825 if (desired_align > 1 && desired_align > align)
25827 rtx saveddest = *destptr;
25829 gcc_assert (desired_align <= size);
25830 /* Align destptr up, place it to new register. */
25831 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25832 GEN_INT (prolog_size),
25833 NULL_RTX, 1, OPTAB_DIRECT);
25834 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25835 REG_POINTER (*destptr) = 1;
25836 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25837 GEN_INT (-desired_align),
25838 *destptr, 1, OPTAB_DIRECT);
25839 /* See how many bytes we skipped. */
25840 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25842 saveddest, 1, OPTAB_DIRECT);
25843 /* Adjust srcptr and count. */
25845 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25846 saveddest, *srcptr, 1, OPTAB_DIRECT);
25847 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25848 saveddest, *count, 1, OPTAB_DIRECT);
25849 /* We copied at most size + prolog_size. */
25850 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25852 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25856 /* Our loops always round down the bock size, but for dispatch to library
25857 we need precise value. */
25859 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25860 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25864 gcc_assert (prolog_size == 0);
25865 /* Decrease count, so we won't end up copying last word twice. */
25866 if (!CONST_INT_P (*count))
25867 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25868 constm1_rtx, *count, 1, OPTAB_DIRECT);
25870 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25871 (unsigned HOST_WIDE_INT)size));
25873 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25878 /* This function is like the previous one, except here we know how many bytes
25879 need to be copied. That allows us to update alignment not only of DST, which
25880 is returned, but also of SRC, which is passed as a pointer for that
25883 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25884 rtx srcreg, rtx value, rtx vec_value,
25885 int desired_align, int align_bytes,
25889 rtx orig_dst = dst;
25890 rtx orig_src = NULL;
25891 int piece_size = 1;
25892 int copied_bytes = 0;
25896 gcc_assert (srcp != NULL);
25901 for (piece_size = 1;
25902 piece_size <= desired_align && copied_bytes < align_bytes;
25905 if (align_bytes & piece_size)
25909 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25910 dst = emit_memset (dst, destreg, vec_value, piece_size);
25912 dst = emit_memset (dst, destreg, value, piece_size);
25915 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25916 copied_bytes += piece_size;
25919 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25920 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25921 if (MEM_SIZE_KNOWN_P (orig_dst))
25922 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25926 int src_align_bytes = get_mem_align_offset (src, desired_align
25928 if (src_align_bytes >= 0)
25929 src_align_bytes = desired_align - src_align_bytes;
25930 if (src_align_bytes >= 0)
25932 unsigned int src_align;
25933 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
25935 if ((src_align_bytes & (src_align - 1))
25936 == (align_bytes & (src_align - 1)))
25939 if (src_align > (unsigned int) desired_align)
25940 src_align = desired_align;
25941 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
25942 set_mem_align (src, src_align * BITS_PER_UNIT);
25944 if (MEM_SIZE_KNOWN_P (orig_src))
25945 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
25952 /* Return true if ALG can be used in current context.
25953 Assume we expand memset if MEMSET is true. */
25955 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
25957 if (alg == no_stringop)
25959 if (alg == vector_loop)
25960 return TARGET_SSE || TARGET_AVX;
25961 /* Algorithms using the rep prefix want at least edi and ecx;
25962 additionally, memset wants eax and memcpy wants esi. Don't
25963 consider such algorithms if the user has appropriated those
25964 registers for their own purposes, or if we have a non-default
25965 address space, since some string insns cannot override the segment. */
25966 if (alg == rep_prefix_1_byte
25967 || alg == rep_prefix_4_byte
25968 || alg == rep_prefix_8_byte)
25972 if (fixed_regs[CX_REG]
25973 || fixed_regs[DI_REG]
25974 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
25980 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
25981 static enum stringop_alg
25982 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
25983 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
25984 bool memset, bool zero_memset, bool have_as,
25985 int *dynamic_check, bool *noalign)
25987 const struct stringop_algs * algs;
25988 bool optimize_for_speed;
25990 const struct processor_costs *cost;
25992 bool any_alg_usable_p = false;
25995 *dynamic_check = -1;
25997 /* Even if the string operation call is cold, we still might spend a lot
25998 of time processing large blocks. */
25999 if (optimize_function_for_size_p (cfun)
26000 || (optimize_insn_for_size_p ()
26002 || (expected_size != -1 && expected_size < 256))))
26003 optimize_for_speed = false;
26005 optimize_for_speed = true;
26007 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
26009 algs = &cost->memset[TARGET_64BIT != 0];
26011 algs = &cost->memcpy[TARGET_64BIT != 0];
26013 /* See maximal size for user defined algorithm. */
26014 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26016 enum stringop_alg candidate = algs->size[i].alg;
26017 bool usable = alg_usable_p (candidate, memset, have_as);
26018 any_alg_usable_p |= usable;
26020 if (candidate != libcall && candidate && usable)
26021 max = algs->size[i].max;
26024 /* If expected size is not known but max size is small enough
26025 so inline version is a win, set expected size into
26027 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26028 && expected_size == -1)
26029 expected_size = min_size / 2 + max_size / 2;
26031 /* If user specified the algorithm, honnor it if possible. */
26032 if (ix86_stringop_alg != no_stringop
26033 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26034 return ix86_stringop_alg;
26035 /* rep; movq or rep; movl is the smallest variant. */
26036 else if (!optimize_for_speed)
26039 if (!count || (count & 3) || (memset && !zero_memset))
26040 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26041 ? rep_prefix_1_byte : loop_1_byte;
26043 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26044 ? rep_prefix_4_byte : loop;
26046 /* Very tiny blocks are best handled via the loop, REP is expensive to
26048 else if (expected_size != -1 && expected_size < 4)
26049 return loop_1_byte;
26050 else if (expected_size != -1)
26052 enum stringop_alg alg = libcall;
26053 bool alg_noalign = false;
26054 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26056 /* We get here if the algorithms that were not libcall-based
26057 were rep-prefix based and we are unable to use rep prefixes
26058 based on global register usage. Break out of the loop and
26059 use the heuristic below. */
26060 if (algs->size[i].max == 0)
26062 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26064 enum stringop_alg candidate = algs->size[i].alg;
26066 if (candidate != libcall
26067 && alg_usable_p (candidate, memset, have_as))
26070 alg_noalign = algs->size[i].noalign;
26072 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26073 last non-libcall inline algorithm. */
26074 if (TARGET_INLINE_ALL_STRINGOPS)
26076 /* When the current size is best to be copied by a libcall,
26077 but we are still forced to inline, run the heuristic below
26078 that will pick code for medium sized blocks. */
26079 if (alg != libcall)
26081 *noalign = alg_noalign;
26084 else if (!any_alg_usable_p)
26087 else if (alg_usable_p (candidate, memset, have_as))
26089 *noalign = algs->size[i].noalign;
26095 /* When asked to inline the call anyway, try to pick meaningful choice.
26096 We look for maximal size of block that is faster to copy by hand and
26097 take blocks of at most of that size guessing that average size will
26098 be roughly half of the block.
26100 If this turns out to be bad, we might simply specify the preferred
26101 choice in ix86_costs. */
26102 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26103 && (algs->unknown_size == libcall
26104 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26106 enum stringop_alg alg;
26108 /* If there aren't any usable algorithms, then recursing on
26109 smaller sizes isn't going to find anything. Just return the
26110 simple byte-at-a-time copy loop. */
26111 if (!any_alg_usable_p)
26113 /* Pick something reasonable. */
26114 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26115 *dynamic_check = 128;
26116 return loop_1_byte;
26120 alg = decide_alg (count, max / 2, min_size, max_size, memset,
26121 zero_memset, have_as, dynamic_check, noalign);
26122 gcc_assert (*dynamic_check == -1);
26123 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26124 *dynamic_check = max;
26126 gcc_assert (alg != libcall);
26129 return (alg_usable_p (algs->unknown_size, memset, have_as)
26130 ? algs->unknown_size : libcall);
26133 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26134 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26136 decide_alignment (int align,
26137 enum stringop_alg alg,
26139 machine_mode move_mode)
26141 int desired_align = 0;
26143 gcc_assert (alg != no_stringop);
26145 if (alg == libcall)
26147 if (move_mode == VOIDmode)
26150 desired_align = GET_MODE_SIZE (move_mode);
26151 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26152 copying whole cacheline at once. */
26153 if (TARGET_PENTIUMPRO
26154 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26159 if (desired_align < align)
26160 desired_align = align;
26161 if (expected_size != -1 && expected_size < 4)
26162 desired_align = align;
26164 return desired_align;
26168 /* Helper function for memcpy. For QImode value 0xXY produce
26169 0xXYXYXYXY of wide specified by MODE. This is essentially
26170 a * 0x10101010, but we can do slightly better than
26171 synth_mult by unwinding the sequence by hand on CPUs with
26174 promote_duplicated_reg (machine_mode mode, rtx val)
26176 machine_mode valmode = GET_MODE (val);
26178 int nops = mode == DImode ? 3 : 2;
26180 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26181 if (val == const0_rtx)
26182 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26183 if (CONST_INT_P (val))
26185 HOST_WIDE_INT v = INTVAL (val) & 255;
26189 if (mode == DImode)
26190 v |= (v << 16) << 16;
26191 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26194 if (valmode == VOIDmode)
26196 if (valmode != QImode)
26197 val = gen_lowpart (QImode, val);
26198 if (mode == QImode)
26200 if (!TARGET_PARTIAL_REG_STALL)
26202 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26203 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26204 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26205 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26207 rtx reg = convert_modes (mode, QImode, val, true);
26208 tmp = promote_duplicated_reg (mode, const1_rtx);
26209 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26214 rtx reg = convert_modes (mode, QImode, val, true);
26216 if (!TARGET_PARTIAL_REG_STALL)
26217 if (mode == SImode)
26218 emit_insn (gen_insvsi_1 (reg, reg));
26220 emit_insn (gen_insvdi_1 (reg, reg));
26223 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26224 NULL, 1, OPTAB_DIRECT);
26226 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26228 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26229 NULL, 1, OPTAB_DIRECT);
26230 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26231 if (mode == SImode)
26233 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26234 NULL, 1, OPTAB_DIRECT);
26235 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26240 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26241 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26242 alignment from ALIGN to DESIRED_ALIGN. */
26244 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26250 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26251 promoted_val = promote_duplicated_reg (DImode, val);
26252 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26253 promoted_val = promote_duplicated_reg (SImode, val);
26254 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26255 promoted_val = promote_duplicated_reg (HImode, val);
26257 promoted_val = val;
26259 return promoted_val;
26262 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26263 operations when profitable. The code depends upon architecture, block size
26264 and alignment, but always has one of the following overall structures:
26266 Aligned move sequence:
26268 1) Prologue guard: Conditional that jumps up to epilogues for small
26269 blocks that can be handled by epilogue alone. This is faster
26270 but also needed for correctness, since prologue assume the block
26271 is larger than the desired alignment.
26273 Optional dynamic check for size and libcall for large
26274 blocks is emitted here too, with -minline-stringops-dynamically.
26276 2) Prologue: copy first few bytes in order to get destination
26277 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26278 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26279 copied. We emit either a jump tree on power of two sized
26280 blocks, or a byte loop.
26282 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26283 with specified algorithm.
26285 4) Epilogue: code copying tail of the block that is too small to be
26286 handled by main body (or up to size guarded by prologue guard).
26288 Misaligned move sequence
26290 1) missaligned move prologue/epilogue containing:
26291 a) Prologue handling small memory blocks and jumping to done_label
26292 (skipped if blocks are known to be large enough)
26293 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26294 needed by single possibly misaligned move
26295 (skipped if alignment is not needed)
26296 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26298 2) Zero size guard dispatching to done_label, if needed
26300 3) dispatch to library call, if needed,
26302 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26303 with specified algorithm. */
26305 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26306 rtx align_exp, rtx expected_align_exp,
26307 rtx expected_size_exp, rtx min_size_exp,
26308 rtx max_size_exp, rtx probable_max_size_exp,
26313 rtx_code_label *label = NULL;
26315 rtx_code_label *jump_around_label = NULL;
26316 HOST_WIDE_INT align = 1;
26317 unsigned HOST_WIDE_INT count = 0;
26318 HOST_WIDE_INT expected_size = -1;
26319 int size_needed = 0, epilogue_size_needed;
26320 int desired_align = 0, align_bytes = 0;
26321 enum stringop_alg alg;
26322 rtx promoted_val = NULL;
26323 rtx vec_promoted_val = NULL;
26324 bool force_loopy_epilogue = false;
26326 bool need_zero_guard = false;
26328 machine_mode move_mode = VOIDmode;
26329 int unroll_factor = 1;
26330 /* TODO: Once value ranges are available, fill in proper data. */
26331 unsigned HOST_WIDE_INT min_size = 0;
26332 unsigned HOST_WIDE_INT max_size = -1;
26333 unsigned HOST_WIDE_INT probable_max_size = -1;
26334 bool misaligned_prologue_used = false;
26337 if (CONST_INT_P (align_exp))
26338 align = INTVAL (align_exp);
26339 /* i386 can do misaligned access on reasonably increased cost. */
26340 if (CONST_INT_P (expected_align_exp)
26341 && INTVAL (expected_align_exp) > align)
26342 align = INTVAL (expected_align_exp);
26343 /* ALIGN is the minimum of destination and source alignment, but we care here
26344 just about destination alignment. */
26346 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26347 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26349 if (CONST_INT_P (count_exp))
26351 min_size = max_size = probable_max_size = count = expected_size
26352 = INTVAL (count_exp);
26353 /* When COUNT is 0, there is nothing to do. */
26360 min_size = INTVAL (min_size_exp);
26362 max_size = INTVAL (max_size_exp);
26363 if (probable_max_size_exp)
26364 probable_max_size = INTVAL (probable_max_size_exp);
26365 if (CONST_INT_P (expected_size_exp))
26366 expected_size = INTVAL (expected_size_exp);
26369 /* Make sure we don't need to care about overflow later on. */
26370 if (count > (HOST_WIDE_INT_1U << 30))
26373 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26375 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26377 /* Step 0: Decide on preferred algorithm, desired alignment and
26378 size of chunks to be copied by main loop. */
26379 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26381 issetmem && val_exp == const0_rtx, have_as,
26382 &dynamic_check, &noalign);
26383 if (alg == libcall)
26385 gcc_assert (alg != no_stringop);
26387 /* For now vector-version of memset is generated only for memory zeroing, as
26388 creating of promoted vector value is very cheap in this case. */
26389 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26390 alg = unrolled_loop;
26393 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26394 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26396 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26399 move_mode = word_mode;
26405 gcc_unreachable ();
26407 need_zero_guard = true;
26408 move_mode = QImode;
26411 need_zero_guard = true;
26413 case unrolled_loop:
26414 need_zero_guard = true;
26415 unroll_factor = (TARGET_64BIT ? 4 : 2);
26418 need_zero_guard = true;
26420 /* Find the widest supported mode. */
26421 move_mode = word_mode;
26422 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26423 != CODE_FOR_nothing)
26424 move_mode = GET_MODE_WIDER_MODE (move_mode);
26426 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26427 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26428 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26430 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26431 move_mode = mode_for_vector (word_mode, nunits);
26432 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26433 move_mode = word_mode;
26435 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26437 case rep_prefix_8_byte:
26438 move_mode = DImode;
26440 case rep_prefix_4_byte:
26441 move_mode = SImode;
26443 case rep_prefix_1_byte:
26444 move_mode = QImode;
26447 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26448 epilogue_size_needed = size_needed;
26450 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26451 if (!TARGET_ALIGN_STRINGOPS || noalign)
26452 align = desired_align;
26454 /* Step 1: Prologue guard. */
26456 /* Alignment code needs count to be in register. */
26457 if (CONST_INT_P (count_exp) && desired_align > align)
26459 if (INTVAL (count_exp) > desired_align
26460 && INTVAL (count_exp) > size_needed)
26463 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26464 if (align_bytes <= 0)
26467 align_bytes = desired_align - align_bytes;
26469 if (align_bytes == 0)
26470 count_exp = force_reg (counter_mode (count_exp), count_exp);
26472 gcc_assert (desired_align >= 1 && align >= 1);
26474 /* Misaligned move sequences handle both prologue and epilogue at once.
26475 Default code generation results in a smaller code for large alignments
26476 and also avoids redundant job when sizes are known precisely. */
26477 misaligned_prologue_used
26478 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26479 && MAX (desired_align, epilogue_size_needed) <= 32
26480 && desired_align <= epilogue_size_needed
26481 && ((desired_align > align && !align_bytes)
26482 || (!count && epilogue_size_needed > 1)));
26484 /* Do the cheap promotion to allow better CSE across the
26485 main loop and epilogue (ie one load of the big constant in the
26487 For now the misaligned move sequences do not have fast path
26488 without broadcasting. */
26489 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26491 if (alg == vector_loop)
26493 gcc_assert (val_exp == const0_rtx);
26494 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26495 promoted_val = promote_duplicated_reg_to_size (val_exp,
26496 GET_MODE_SIZE (word_mode),
26497 desired_align, align);
26501 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26502 desired_align, align);
26505 /* Misaligned move sequences handles both prologues and epilogues at once.
26506 Default code generation results in smaller code for large alignments and
26507 also avoids redundant job when sizes are known precisely. */
26508 if (misaligned_prologue_used)
26510 /* Misaligned move prologue handled small blocks by itself. */
26511 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26512 (dst, src, &destreg, &srcreg,
26513 move_mode, promoted_val, vec_promoted_val,
26515 &jump_around_label,
26516 desired_align < align
26517 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26518 desired_align, align, &min_size, dynamic_check, issetmem);
26520 src = change_address (src, BLKmode, srcreg);
26521 dst = change_address (dst, BLKmode, destreg);
26522 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26523 epilogue_size_needed = 0;
26524 if (need_zero_guard
26525 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26527 /* It is possible that we copied enough so the main loop will not
26529 gcc_assert (size_needed > 1);
26530 if (jump_around_label == NULL_RTX)
26531 jump_around_label = gen_label_rtx ();
26532 emit_cmp_and_jump_insns (count_exp,
26533 GEN_INT (size_needed),
26534 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26535 if (expected_size == -1
26536 || expected_size < (desired_align - align) / 2 + size_needed)
26537 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26539 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26542 /* Ensure that alignment prologue won't copy past end of block. */
26543 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26545 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26546 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26547 Make sure it is power of 2. */
26548 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26550 /* To improve performance of small blocks, we jump around the VAL
26551 promoting mode. This mean that if the promoted VAL is not constant,
26552 we might not use it in the epilogue and have to use byte
26554 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26555 force_loopy_epilogue = true;
26556 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26557 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26559 /* If main algorithm works on QImode, no epilogue is needed.
26560 For small sizes just don't align anything. */
26561 if (size_needed == 1)
26562 desired_align = align;
26567 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26569 label = gen_label_rtx ();
26570 emit_cmp_and_jump_insns (count_exp,
26571 GEN_INT (epilogue_size_needed),
26572 LTU, 0, counter_mode (count_exp), 1, label);
26573 if (expected_size == -1 || expected_size < epilogue_size_needed)
26574 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26576 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26580 /* Emit code to decide on runtime whether library call or inline should be
26582 if (dynamic_check != -1)
26584 if (!issetmem && CONST_INT_P (count_exp))
26586 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26588 emit_block_move_via_libcall (dst, src, count_exp, false);
26589 count_exp = const0_rtx;
26595 rtx_code_label *hot_label = gen_label_rtx ();
26596 if (jump_around_label == NULL_RTX)
26597 jump_around_label = gen_label_rtx ();
26598 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26599 LEU, 0, counter_mode (count_exp),
26601 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26603 set_storage_via_libcall (dst, count_exp, val_exp, false);
26605 emit_block_move_via_libcall (dst, src, count_exp, false);
26606 emit_jump (jump_around_label);
26607 emit_label (hot_label);
26611 /* Step 2: Alignment prologue. */
26612 /* Do the expensive promotion once we branched off the small blocks. */
26613 if (issetmem && !promoted_val)
26614 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26615 desired_align, align);
26617 if (desired_align > align && !misaligned_prologue_used)
26619 if (align_bytes == 0)
26621 /* Except for the first move in prologue, we no longer know
26622 constant offset in aliasing info. It don't seems to worth
26623 the pain to maintain it for the first move, so throw away
26625 dst = change_address (dst, BLKmode, destreg);
26627 src = change_address (src, BLKmode, srcreg);
26628 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26629 promoted_val, vec_promoted_val,
26630 count_exp, align, desired_align,
26632 /* At most desired_align - align bytes are copied. */
26633 if (min_size < (unsigned)(desired_align - align))
26636 min_size -= desired_align - align;
26640 /* If we know how many bytes need to be stored before dst is
26641 sufficiently aligned, maintain aliasing info accurately. */
26642 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26650 count_exp = plus_constant (counter_mode (count_exp),
26651 count_exp, -align_bytes);
26652 count -= align_bytes;
26653 min_size -= align_bytes;
26654 max_size -= align_bytes;
26656 if (need_zero_guard
26657 && min_size < (unsigned HOST_WIDE_INT) size_needed
26658 && (count < (unsigned HOST_WIDE_INT) size_needed
26659 || (align_bytes == 0
26660 && count < ((unsigned HOST_WIDE_INT) size_needed
26661 + desired_align - align))))
26663 /* It is possible that we copied enough so the main loop will not
26665 gcc_assert (size_needed > 1);
26666 if (label == NULL_RTX)
26667 label = gen_label_rtx ();
26668 emit_cmp_and_jump_insns (count_exp,
26669 GEN_INT (size_needed),
26670 LTU, 0, counter_mode (count_exp), 1, label);
26671 if (expected_size == -1
26672 || expected_size < (desired_align - align) / 2 + size_needed)
26673 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26675 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26678 if (label && size_needed == 1)
26680 emit_label (label);
26681 LABEL_NUSES (label) = 1;
26683 epilogue_size_needed = 1;
26685 promoted_val = val_exp;
26687 else if (label == NULL_RTX && !misaligned_prologue_used)
26688 epilogue_size_needed = size_needed;
26690 /* Step 3: Main loop. */
26697 gcc_unreachable ();
26700 case unrolled_loop:
26701 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26702 count_exp, move_mode, unroll_factor,
26703 expected_size, issetmem);
26706 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26707 vec_promoted_val, count_exp, move_mode,
26708 unroll_factor, expected_size, issetmem);
26710 case rep_prefix_8_byte:
26711 case rep_prefix_4_byte:
26712 case rep_prefix_1_byte:
26713 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26714 val_exp, count_exp, move_mode, issetmem);
26717 /* Adjust properly the offset of src and dest memory for aliasing. */
26718 if (CONST_INT_P (count_exp))
26721 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26722 (count / size_needed) * size_needed);
26723 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26724 (count / size_needed) * size_needed);
26729 src = change_address (src, BLKmode, srcreg);
26730 dst = change_address (dst, BLKmode, destreg);
26733 /* Step 4: Epilogue to copy the remaining bytes. */
26737 /* When the main loop is done, COUNT_EXP might hold original count,
26738 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26739 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26740 bytes. Compensate if needed. */
26742 if (size_needed < epilogue_size_needed)
26745 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26746 GEN_INT (size_needed - 1), count_exp, 1,
26748 if (tmp != count_exp)
26749 emit_move_insn (count_exp, tmp);
26751 emit_label (label);
26752 LABEL_NUSES (label) = 1;
26755 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26757 if (force_loopy_epilogue)
26758 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26759 epilogue_size_needed);
26763 expand_setmem_epilogue (dst, destreg, promoted_val,
26764 vec_promoted_val, count_exp,
26765 epilogue_size_needed);
26767 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26768 epilogue_size_needed);
26771 if (jump_around_label)
26772 emit_label (jump_around_label);
26777 /* Expand the appropriate insns for doing strlen if not just doing
26780 out = result, initialized with the start address
26781 align_rtx = alignment of the address.
26782 scratch = scratch register, initialized with the startaddress when
26783 not aligned, otherwise undefined
26785 This is just the body. It needs the initializations mentioned above and
26786 some address computing at the end. These things are done in i386.md. */
26789 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26793 rtx_code_label *align_2_label = NULL;
26794 rtx_code_label *align_3_label = NULL;
26795 rtx_code_label *align_4_label = gen_label_rtx ();
26796 rtx_code_label *end_0_label = gen_label_rtx ();
26798 rtx tmpreg = gen_reg_rtx (SImode);
26799 rtx scratch = gen_reg_rtx (SImode);
26803 if (CONST_INT_P (align_rtx))
26804 align = INTVAL (align_rtx);
26806 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26808 /* Is there a known alignment and is it less than 4? */
26811 rtx scratch1 = gen_reg_rtx (Pmode);
26812 emit_move_insn (scratch1, out);
26813 /* Is there a known alignment and is it not 2? */
26816 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26817 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26819 /* Leave just the 3 lower bits. */
26820 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26821 NULL_RTX, 0, OPTAB_WIDEN);
26823 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26824 Pmode, 1, align_4_label);
26825 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26826 Pmode, 1, align_2_label);
26827 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26828 Pmode, 1, align_3_label);
26832 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26833 check if is aligned to 4 - byte. */
26835 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26836 NULL_RTX, 0, OPTAB_WIDEN);
26838 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26839 Pmode, 1, align_4_label);
26842 mem = change_address (src, QImode, out);
26844 /* Now compare the bytes. */
26846 /* Compare the first n unaligned byte on a byte per byte basis. */
26847 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26848 QImode, 1, end_0_label);
26850 /* Increment the address. */
26851 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26853 /* Not needed with an alignment of 2 */
26856 emit_label (align_2_label);
26858 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26861 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26863 emit_label (align_3_label);
26866 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26869 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26872 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26873 align this loop. It gives only huge programs, but does not help to
26875 emit_label (align_4_label);
26877 mem = change_address (src, SImode, out);
26878 emit_move_insn (scratch, mem);
26879 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26881 /* This formula yields a nonzero result iff one of the bytes is zero.
26882 This saves three branches inside loop and many cycles. */
26884 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26885 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26886 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26887 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26888 gen_int_mode (0x80808080, SImode)));
26889 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26894 rtx reg = gen_reg_rtx (SImode);
26895 rtx reg2 = gen_reg_rtx (Pmode);
26896 emit_move_insn (reg, tmpreg);
26897 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26899 /* If zero is not in the first two bytes, move two bytes forward. */
26900 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26901 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26902 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26903 emit_insn (gen_rtx_SET (tmpreg,
26904 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26907 /* Emit lea manually to avoid clobbering of flags. */
26908 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26910 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26911 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26912 emit_insn (gen_rtx_SET (out,
26913 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26919 rtx_code_label *end_2_label = gen_label_rtx ();
26920 /* Is zero in the first two bytes? */
26922 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26923 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26924 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
26925 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26926 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
26928 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
26929 JUMP_LABEL (tmp) = end_2_label;
26931 /* Not in the first two. Move two bytes forward. */
26932 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
26933 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
26935 emit_label (end_2_label);
26939 /* Avoid branch in fixing the byte. */
26940 tmpreg = gen_lowpart (QImode, tmpreg);
26941 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
26942 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
26943 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
26944 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
26946 emit_label (end_0_label);
26949 /* Expand strlen. */
26952 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
26954 rtx addr, scratch1, scratch2, scratch3, scratch4;
26956 /* The generic case of strlen expander is long. Avoid it's
26957 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
26959 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26960 && !TARGET_INLINE_ALL_STRINGOPS
26961 && !optimize_insn_for_size_p ()
26962 && (!CONST_INT_P (align) || INTVAL (align) < 4))
26965 addr = force_reg (Pmode, XEXP (src, 0));
26966 scratch1 = gen_reg_rtx (Pmode);
26968 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26969 && !optimize_insn_for_size_p ())
26971 /* Well it seems that some optimizer does not combine a call like
26972 foo(strlen(bar), strlen(bar));
26973 when the move and the subtraction is done here. It does calculate
26974 the length just once when these instructions are done inside of
26975 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
26976 often used and I use one fewer register for the lifetime of
26977 output_strlen_unroll() this is better. */
26979 emit_move_insn (out, addr);
26981 ix86_expand_strlensi_unroll_1 (out, src, align);
26983 /* strlensi_unroll_1 returns the address of the zero at the end of
26984 the string, like memchr(), so compute the length by subtracting
26985 the start address. */
26986 emit_insn (ix86_gen_sub3 (out, out, addr));
26992 /* Can't use this if the user has appropriated eax, ecx, or edi. */
26993 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
26995 /* Can't use this for non-default address spaces. */
26996 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
26999 scratch2 = gen_reg_rtx (Pmode);
27000 scratch3 = gen_reg_rtx (Pmode);
27001 scratch4 = force_reg (Pmode, constm1_rtx);
27003 emit_move_insn (scratch3, addr);
27004 eoschar = force_reg (QImode, eoschar);
27006 src = replace_equiv_address_nv (src, scratch3);
27008 /* If .md starts supporting :P, this can be done in .md. */
27009 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
27010 scratch4), UNSPEC_SCAS);
27011 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27012 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27013 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27018 /* For given symbol (function) construct code to compute address of it's PLT
27019 entry in large x86-64 PIC model. */
27021 construct_plt_address (rtx symbol)
27025 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27026 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27027 gcc_assert (Pmode == DImode);
27029 tmp = gen_reg_rtx (Pmode);
27030 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27032 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27033 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27038 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27040 rtx pop, bool sibcall)
27043 rtx use = NULL, call;
27044 unsigned int vec_len = 0;
27046 if (pop == const0_rtx)
27048 gcc_assert (!TARGET_64BIT || !pop);
27050 if (TARGET_MACHO && !TARGET_64BIT)
27053 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27054 fnaddr = machopic_indirect_call_target (fnaddr);
27059 /* Static functions and indirect calls don't need the pic register. Also,
27060 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27061 it an indirect call. */
27062 rtx addr = XEXP (fnaddr, 0);
27064 && GET_CODE (addr) == SYMBOL_REF
27065 && !SYMBOL_REF_LOCAL_P (addr))
27068 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27069 || !lookup_attribute ("noplt",
27070 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27073 || (ix86_cmodel == CM_LARGE_PIC
27074 && DEFAULT_ABI != MS_ABI))
27076 use_reg (&use, gen_rtx_REG (Pmode,
27077 REAL_PIC_OFFSET_TABLE_REGNUM));
27078 if (ix86_use_pseudo_pic_reg ())
27079 emit_move_insn (gen_rtx_REG (Pmode,
27080 REAL_PIC_OFFSET_TABLE_REGNUM),
27081 pic_offset_table_rtx);
27084 else if (!TARGET_PECOFF && !TARGET_MACHO)
27088 fnaddr = gen_rtx_UNSPEC (Pmode,
27089 gen_rtvec (1, addr),
27091 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27095 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27097 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27098 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27101 fnaddr = gen_const_mem (Pmode, fnaddr);
27102 /* Pmode may not be the same as word_mode for x32, which
27103 doesn't support indirect branch via 32-bit memory slot.
27104 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27105 indirect branch via x32 GOT slot is OK. */
27106 if (GET_MODE (fnaddr) != word_mode)
27107 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27108 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27113 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27114 parameters passed in vector registers. */
27116 && (INTVAL (callarg2) > 0
27117 || (INTVAL (callarg2) == 0
27118 && (TARGET_SSE || !flag_skip_rax_setup))))
27120 rtx al = gen_rtx_REG (QImode, AX_REG);
27121 emit_move_insn (al, callarg2);
27122 use_reg (&use, al);
27125 if (ix86_cmodel == CM_LARGE_PIC
27128 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27129 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27130 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27131 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27132 branch via x32 GOT slot is OK. */
27133 else if (!(TARGET_X32
27135 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27136 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27138 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27139 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27141 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27142 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27145 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27149 /* We should add bounds as destination register in case
27150 pointer with bounds may be returned. */
27151 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27153 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27154 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27155 if (GET_CODE (retval) == PARALLEL)
27157 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27158 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27159 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27160 retval = chkp_join_splitted_slot (retval, par);
27164 retval = gen_rtx_PARALLEL (VOIDmode,
27165 gen_rtvec (3, retval, b0, b1));
27166 chkp_put_regs_to_expr_list (retval);
27170 call = gen_rtx_SET (retval, call);
27172 vec[vec_len++] = call;
27176 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27177 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27178 vec[vec_len++] = pop;
27181 if (TARGET_64BIT_MS_ABI
27182 && (!callarg2 || INTVAL (callarg2) != -2))
27184 int const cregs_size
27185 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
27188 for (i = 0; i < cregs_size; i++)
27190 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27191 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27193 clobber_reg (&use, gen_rtx_REG (mode, regno));
27198 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27199 call = emit_call_insn (call);
27201 CALL_INSN_FUNCTION_USAGE (call) = use;
27206 /* Return true if the function being called was marked with attribute "noplt"
27207 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
27208 handle the non-PIC case in the backend because there is no easy interface
27209 for the front-end to force non-PLT calls to use the GOT. This is currently
27210 used only with 64-bit ELF targets to call the function marked "noplt"
27214 ix86_nopic_noplt_attribute_p (rtx call_op)
27216 if (flag_pic || ix86_cmodel == CM_LARGE
27217 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27218 || SYMBOL_REF_LOCAL_P (call_op))
27221 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27224 || (symbol_decl != NULL_TREE
27225 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27231 /* Output the assembly for a call instruction. */
27234 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27236 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27237 bool seh_nop_p = false;
27240 if (SIBLING_CALL_P (insn))
27242 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27243 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
27245 xasm = "%!jmp\t%P0";
27246 /* SEH epilogue detection requires the indirect branch case
27247 to include REX.W. */
27248 else if (TARGET_SEH)
27249 xasm = "%!rex.W jmp %A0";
27251 xasm = "%!jmp\t%A0";
27253 output_asm_insn (xasm, &call_op);
27257 /* SEH unwinding can require an extra nop to be emitted in several
27258 circumstances. Determine if we have one of those. */
27263 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27265 /* If we get to another real insn, we don't need the nop. */
27269 /* If we get to the epilogue note, prevent a catch region from
27270 being adjacent to the standard epilogue sequence. If non-
27271 call-exceptions, we'll have done this during epilogue emission. */
27272 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27273 && !flag_non_call_exceptions
27274 && !can_throw_internal (insn))
27281 /* If we didn't find a real insn following the call, prevent the
27282 unwinder from looking into the next function. */
27287 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27288 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
27290 xasm = "%!call\t%P0";
27292 xasm = "%!call\t%A0";
27294 output_asm_insn (xasm, &call_op);
27302 /* Clear stack slot assignments remembered from previous functions.
27303 This is called from INIT_EXPANDERS once before RTL is emitted for each
27306 static struct machine_function *
27307 ix86_init_machine_status (void)
27309 struct machine_function *f;
27311 f = ggc_cleared_alloc<machine_function> ();
27312 f->use_fast_prologue_epilogue_nregs = -1;
27313 f->call_abi = ix86_abi;
27318 /* Return a MEM corresponding to a stack slot with mode MODE.
27319 Allocate a new slot if necessary.
27321 The RTL for a function can have several slots available: N is
27322 which slot to use. */
27325 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27327 struct stack_local_entry *s;
27329 gcc_assert (n < MAX_386_STACK_LOCALS);
27331 for (s = ix86_stack_locals; s; s = s->next)
27332 if (s->mode == mode && s->n == n)
27333 return validize_mem (copy_rtx (s->rtl));
27335 s = ggc_alloc<stack_local_entry> ();
27338 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27340 s->next = ix86_stack_locals;
27341 ix86_stack_locals = s;
27342 return validize_mem (copy_rtx (s->rtl));
27346 ix86_instantiate_decls (void)
27348 struct stack_local_entry *s;
27350 for (s = ix86_stack_locals; s; s = s->next)
27351 if (s->rtl != NULL_RTX)
27352 instantiate_decl_rtl (s->rtl);
27355 /* Return the number used for encoding REG, in the range 0..7. */
27358 reg_encoded_number (rtx reg)
27360 unsigned regno = REGNO (reg);
27382 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27383 return regno - FIRST_STACK_REG;
27384 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27385 return regno - FIRST_SSE_REG;
27386 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27387 return regno - FIRST_MMX_REG;
27388 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27389 return regno - FIRST_REX_SSE_REG;
27390 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27391 return regno - FIRST_REX_INT_REG;
27392 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27393 return regno - FIRST_MASK_REG;
27394 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27395 return regno - FIRST_BND_REG;
27399 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27400 in its encoding if it could be relevant for ROP mitigation, otherwise
27401 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27402 used for calculating it into them. */
27405 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27406 int *popno0 = 0, int *popno1 = 0)
27408 if (asm_noperands (PATTERN (insn)) >= 0)
27410 int has_modrm = get_attr_modrm (insn);
27413 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27417 case MODRM_CLASS_OP02:
27418 gcc_assert (noperands >= 3);
27427 case MODRM_CLASS_OP01:
27428 gcc_assert (noperands >= 2);
27440 if (REG_P (op0) && REG_P (op1))
27442 int enc0 = reg_encoded_number (op0);
27443 int enc1 = reg_encoded_number (op1);
27444 return 0xc0 + (enc1 << 3) + enc0;
27449 /* Check whether x86 address PARTS is a pc-relative address. */
27452 rip_relative_addr_p (struct ix86_address *parts)
27454 rtx base, index, disp;
27456 base = parts->base;
27457 index = parts->index;
27458 disp = parts->disp;
27460 if (disp && !base && !index)
27466 if (GET_CODE (disp) == CONST)
27467 symbol = XEXP (disp, 0);
27468 if (GET_CODE (symbol) == PLUS
27469 && CONST_INT_P (XEXP (symbol, 1)))
27470 symbol = XEXP (symbol, 0);
27472 if (GET_CODE (symbol) == LABEL_REF
27473 || (GET_CODE (symbol) == SYMBOL_REF
27474 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27475 || (GET_CODE (symbol) == UNSPEC
27476 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27477 || XINT (symbol, 1) == UNSPEC_PCREL
27478 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27485 /* Calculate the length of the memory address in the instruction encoding.
27486 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27487 or other prefixes. We never generate addr32 prefix for LEA insn. */
27490 memory_address_length (rtx addr, bool lea)
27492 struct ix86_address parts;
27493 rtx base, index, disp;
27497 if (GET_CODE (addr) == PRE_DEC
27498 || GET_CODE (addr) == POST_INC
27499 || GET_CODE (addr) == PRE_MODIFY
27500 || GET_CODE (addr) == POST_MODIFY)
27503 ok = ix86_decompose_address (addr, &parts);
27506 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
27508 /* If this is not LEA instruction, add the length of addr32 prefix. */
27509 if (TARGET_64BIT && !lea
27510 && (SImode_address_operand (addr, VOIDmode)
27511 || (parts.base && GET_MODE (parts.base) == SImode)
27512 || (parts.index && GET_MODE (parts.index) == SImode)))
27516 index = parts.index;
27519 if (base && SUBREG_P (base))
27520 base = SUBREG_REG (base);
27521 if (index && SUBREG_P (index))
27522 index = SUBREG_REG (index);
27524 gcc_assert (base == NULL_RTX || REG_P (base));
27525 gcc_assert (index == NULL_RTX || REG_P (index));
27528 - esp as the base always wants an index,
27529 - ebp as the base always wants a displacement,
27530 - r12 as the base always wants an index,
27531 - r13 as the base always wants a displacement. */
27533 /* Register Indirect. */
27534 if (base && !index && !disp)
27536 /* esp (for its index) and ebp (for its displacement) need
27537 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27539 if (base == arg_pointer_rtx
27540 || base == frame_pointer_rtx
27541 || REGNO (base) == SP_REG
27542 || REGNO (base) == BP_REG
27543 || REGNO (base) == R12_REG
27544 || REGNO (base) == R13_REG)
27548 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27549 is not disp32, but disp32(%rip), so for disp32
27550 SIB byte is needed, unless print_operand_address
27551 optimizes it into disp32(%rip) or (%rip) is implied
27553 else if (disp && !base && !index)
27556 if (rip_relative_addr_p (&parts))
27561 /* Find the length of the displacement constant. */
27564 if (base && satisfies_constraint_K (disp))
27569 /* ebp always wants a displacement. Similarly r13. */
27570 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27573 /* An index requires the two-byte modrm form.... */
27575 /* ...like esp (or r12), which always wants an index. */
27576 || base == arg_pointer_rtx
27577 || base == frame_pointer_rtx
27578 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27585 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27586 is set, expect that insn have 8bit immediate alternative. */
27588 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27592 extract_insn_cached (insn);
27593 for (i = recog_data.n_operands - 1; i >= 0; --i)
27594 if (CONSTANT_P (recog_data.operand[i]))
27596 enum attr_mode mode = get_attr_mode (insn);
27599 if (shortform && CONST_INT_P (recog_data.operand[i]))
27601 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27608 ival = trunc_int_for_mode (ival, HImode);
27611 ival = trunc_int_for_mode (ival, SImode);
27616 if (IN_RANGE (ival, -128, 127))
27633 /* Immediates for DImode instructions are encoded
27634 as 32bit sign extended values. */
27639 fatal_insn ("unknown insn mode", insn);
27645 /* Compute default value for "length_address" attribute. */
27647 ix86_attr_length_address_default (rtx_insn *insn)
27651 if (get_attr_type (insn) == TYPE_LEA)
27653 rtx set = PATTERN (insn), addr;
27655 if (GET_CODE (set) == PARALLEL)
27656 set = XVECEXP (set, 0, 0);
27658 gcc_assert (GET_CODE (set) == SET);
27660 addr = SET_SRC (set);
27662 return memory_address_length (addr, true);
27665 extract_insn_cached (insn);
27666 for (i = recog_data.n_operands - 1; i >= 0; --i)
27668 rtx op = recog_data.operand[i];
27671 constrain_operands_cached (insn, reload_completed);
27672 if (which_alternative != -1)
27674 const char *constraints = recog_data.constraints[i];
27675 int alt = which_alternative;
27677 while (*constraints == '=' || *constraints == '+')
27680 while (*constraints++ != ',')
27682 /* Skip ignored operands. */
27683 if (*constraints == 'X')
27687 int len = memory_address_length (XEXP (op, 0), false);
27689 /* Account for segment prefix for non-default addr spaces. */
27690 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
27699 /* Compute default value for "length_vex" attribute. It includes
27700 2 or 3 byte VEX prefix and 1 opcode byte. */
27703 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27708 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27709 byte VEX prefix. */
27710 if (!has_0f_opcode || has_vex_w)
27713 /* We can always use 2 byte VEX prefix in 32bit. */
27717 extract_insn_cached (insn);
27719 for (i = recog_data.n_operands - 1; i >= 0; --i)
27720 if (REG_P (recog_data.operand[i]))
27722 /* REX.W bit uses 3 byte VEX prefix. */
27723 if (GET_MODE (recog_data.operand[i]) == DImode
27724 && GENERAL_REG_P (recog_data.operand[i]))
27729 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27730 if (MEM_P (recog_data.operand[i])
27731 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27738 /* Return the maximum number of instructions a cpu can issue. */
27741 ix86_issue_rate (void)
27745 case PROCESSOR_PENTIUM:
27746 case PROCESSOR_LAKEMONT:
27747 case PROCESSOR_BONNELL:
27748 case PROCESSOR_SILVERMONT:
27749 case PROCESSOR_KNL:
27750 case PROCESSOR_INTEL:
27752 case PROCESSOR_BTVER2:
27753 case PROCESSOR_PENTIUM4:
27754 case PROCESSOR_NOCONA:
27757 case PROCESSOR_PENTIUMPRO:
27758 case PROCESSOR_ATHLON:
27760 case PROCESSOR_AMDFAM10:
27761 case PROCESSOR_GENERIC:
27762 case PROCESSOR_BTVER1:
27765 case PROCESSOR_BDVER1:
27766 case PROCESSOR_BDVER2:
27767 case PROCESSOR_BDVER3:
27768 case PROCESSOR_BDVER4:
27769 case PROCESSOR_ZNVER1:
27770 case PROCESSOR_CORE2:
27771 case PROCESSOR_NEHALEM:
27772 case PROCESSOR_SANDYBRIDGE:
27773 case PROCESSOR_HASWELL:
27781 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27782 by DEP_INSN and nothing set by DEP_INSN. */
27785 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27789 /* Simplify the test for uninteresting insns. */
27790 if (insn_type != TYPE_SETCC
27791 && insn_type != TYPE_ICMOV
27792 && insn_type != TYPE_FCMOV
27793 && insn_type != TYPE_IBR)
27796 if ((set = single_set (dep_insn)) != 0)
27798 set = SET_DEST (set);
27801 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27802 && XVECLEN (PATTERN (dep_insn), 0) == 2
27803 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27804 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27806 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27807 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27812 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27815 /* This test is true if the dependent insn reads the flags but
27816 not any other potentially set register. */
27817 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27820 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27826 /* Return true iff USE_INSN has a memory address with operands set by
27830 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27833 extract_insn_cached (use_insn);
27834 for (i = recog_data.n_operands - 1; i >= 0; --i)
27835 if (MEM_P (recog_data.operand[i]))
27837 rtx addr = XEXP (recog_data.operand[i], 0);
27838 return modified_in_p (addr, set_insn) != 0;
27843 /* Helper function for exact_store_load_dependency.
27844 Return true if addr is found in insn. */
27846 exact_dependency_1 (rtx addr, rtx insn)
27848 enum rtx_code code;
27849 const char *format_ptr;
27852 code = GET_CODE (insn);
27856 if (rtx_equal_p (addr, insn))
27871 format_ptr = GET_RTX_FORMAT (code);
27872 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27874 switch (*format_ptr++)
27877 if (exact_dependency_1 (addr, XEXP (insn, i)))
27881 for (j = 0; j < XVECLEN (insn, i); j++)
27882 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27890 /* Return true if there exists exact dependency for store & load, i.e.
27891 the same memory address is used in them. */
27893 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27897 set1 = single_set (store);
27900 if (!MEM_P (SET_DEST (set1)))
27902 set2 = single_set (load);
27905 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27911 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27913 enum attr_type insn_type, dep_insn_type;
27914 enum attr_memory memory;
27916 int dep_insn_code_number;
27918 /* Anti and output dependencies have zero cost on all CPUs. */
27919 if (REG_NOTE_KIND (link) != 0)
27922 dep_insn_code_number = recog_memoized (dep_insn);
27924 /* If we can't recognize the insns, we can't really do anything. */
27925 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
27928 insn_type = get_attr_type (insn);
27929 dep_insn_type = get_attr_type (dep_insn);
27933 case PROCESSOR_PENTIUM:
27934 case PROCESSOR_LAKEMONT:
27935 /* Address Generation Interlock adds a cycle of latency. */
27936 if (insn_type == TYPE_LEA)
27938 rtx addr = PATTERN (insn);
27940 if (GET_CODE (addr) == PARALLEL)
27941 addr = XVECEXP (addr, 0, 0);
27943 gcc_assert (GET_CODE (addr) == SET);
27945 addr = SET_SRC (addr);
27946 if (modified_in_p (addr, dep_insn))
27949 else if (ix86_agi_dependent (dep_insn, insn))
27952 /* ??? Compares pair with jump/setcc. */
27953 if (ix86_flags_dependent (insn, dep_insn, insn_type))
27956 /* Floating point stores require value to be ready one cycle earlier. */
27957 if (insn_type == TYPE_FMOV
27958 && get_attr_memory (insn) == MEMORY_STORE
27959 && !ix86_agi_dependent (dep_insn, insn))
27963 case PROCESSOR_PENTIUMPRO:
27964 /* INT->FP conversion is expensive. */
27965 if (get_attr_fp_int_src (dep_insn))
27968 /* There is one cycle extra latency between an FP op and a store. */
27969 if (insn_type == TYPE_FMOV
27970 && (set = single_set (dep_insn)) != NULL_RTX
27971 && (set2 = single_set (insn)) != NULL_RTX
27972 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
27973 && MEM_P (SET_DEST (set2)))
27976 memory = get_attr_memory (insn);
27978 /* Show ability of reorder buffer to hide latency of load by executing
27979 in parallel with previous instruction in case
27980 previous instruction is not needed to compute the address. */
27981 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27982 && !ix86_agi_dependent (dep_insn, insn))
27984 /* Claim moves to take one cycle, as core can issue one load
27985 at time and the next load can start cycle later. */
27986 if (dep_insn_type == TYPE_IMOV
27987 || dep_insn_type == TYPE_FMOV)
27995 /* The esp dependency is resolved before
27996 the instruction is really finished. */
27997 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
27998 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28001 /* INT->FP conversion is expensive. */
28002 if (get_attr_fp_int_src (dep_insn))
28005 memory = get_attr_memory (insn);
28007 /* Show ability of reorder buffer to hide latency of load by executing
28008 in parallel with previous instruction in case
28009 previous instruction is not needed to compute the address. */
28010 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28011 && !ix86_agi_dependent (dep_insn, insn))
28013 /* Claim moves to take one cycle, as core can issue one load
28014 at time and the next load can start cycle later. */
28015 if (dep_insn_type == TYPE_IMOV
28016 || dep_insn_type == TYPE_FMOV)
28025 case PROCESSOR_AMDFAM10:
28026 case PROCESSOR_BDVER1:
28027 case PROCESSOR_BDVER2:
28028 case PROCESSOR_BDVER3:
28029 case PROCESSOR_BDVER4:
28030 case PROCESSOR_ZNVER1:
28031 case PROCESSOR_BTVER1:
28032 case PROCESSOR_BTVER2:
28033 case PROCESSOR_GENERIC:
28034 /* Stack engine allows to execute push&pop instructions in parall. */
28035 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28036 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28040 case PROCESSOR_ATHLON:
28042 memory = get_attr_memory (insn);
28044 /* Show ability of reorder buffer to hide latency of load by executing
28045 in parallel with previous instruction in case
28046 previous instruction is not needed to compute the address. */
28047 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28048 && !ix86_agi_dependent (dep_insn, insn))
28050 enum attr_unit unit = get_attr_unit (insn);
28053 /* Because of the difference between the length of integer and
28054 floating unit pipeline preparation stages, the memory operands
28055 for floating point are cheaper.
28057 ??? For Athlon it the difference is most probably 2. */
28058 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28061 loadcost = TARGET_ATHLON ? 2 : 0;
28063 if (cost >= loadcost)
28070 case PROCESSOR_CORE2:
28071 case PROCESSOR_NEHALEM:
28072 case PROCESSOR_SANDYBRIDGE:
28073 case PROCESSOR_HASWELL:
28074 /* Stack engine allows to execute push&pop instructions in parall. */
28075 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28076 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28079 memory = get_attr_memory (insn);
28081 /* Show ability of reorder buffer to hide latency of load by executing
28082 in parallel with previous instruction in case
28083 previous instruction is not needed to compute the address. */
28084 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28085 && !ix86_agi_dependent (dep_insn, insn))
28094 case PROCESSOR_SILVERMONT:
28095 case PROCESSOR_KNL:
28096 case PROCESSOR_INTEL:
28097 if (!reload_completed)
28100 /* Increase cost of integer loads. */
28101 memory = get_attr_memory (dep_insn);
28102 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28104 enum attr_unit unit = get_attr_unit (dep_insn);
28105 if (unit == UNIT_INTEGER && cost == 1)
28107 if (memory == MEMORY_LOAD)
28111 /* Increase cost of ld/st for short int types only
28112 because of store forwarding issue. */
28113 rtx set = single_set (dep_insn);
28114 if (set && (GET_MODE (SET_DEST (set)) == QImode
28115 || GET_MODE (SET_DEST (set)) == HImode))
28117 /* Increase cost of store/load insn if exact
28118 dependence exists and it is load insn. */
28119 enum attr_memory insn_memory = get_attr_memory (insn);
28120 if (insn_memory == MEMORY_LOAD
28121 && exact_store_load_dependency (dep_insn, insn))
28135 /* How many alternative schedules to try. This should be as wide as the
28136 scheduling freedom in the DFA, but no wider. Making this value too
28137 large results extra work for the scheduler. */
28140 ia32_multipass_dfa_lookahead (void)
28144 case PROCESSOR_PENTIUM:
28145 case PROCESSOR_LAKEMONT:
28148 case PROCESSOR_PENTIUMPRO:
28152 case PROCESSOR_BDVER1:
28153 case PROCESSOR_BDVER2:
28154 case PROCESSOR_BDVER3:
28155 case PROCESSOR_BDVER4:
28156 /* We use lookahead value 4 for BD both before and after reload
28157 schedules. Plan is to have value 8 included for O3. */
28160 case PROCESSOR_CORE2:
28161 case PROCESSOR_NEHALEM:
28162 case PROCESSOR_SANDYBRIDGE:
28163 case PROCESSOR_HASWELL:
28164 case PROCESSOR_BONNELL:
28165 case PROCESSOR_SILVERMONT:
28166 case PROCESSOR_KNL:
28167 case PROCESSOR_INTEL:
28168 /* Generally, we want haifa-sched:max_issue() to look ahead as far
28169 as many instructions can be executed on a cycle, i.e.,
28170 issue_rate. I wonder why tuning for many CPUs does not do this. */
28171 if (reload_completed)
28172 return ix86_issue_rate ();
28173 /* Don't use lookahead for pre-reload schedule to save compile time. */
28181 /* Return true if target platform supports macro-fusion. */
28184 ix86_macro_fusion_p ()
28186 return TARGET_FUSE_CMP_AND_BRANCH;
28189 /* Check whether current microarchitecture support macro fusion
28190 for insn pair "CONDGEN + CONDJMP". Refer to
28191 "Intel Architectures Optimization Reference Manual". */
28194 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
28197 enum rtx_code ccode;
28198 rtx compare_set = NULL_RTX, test_if, cond;
28199 rtx alu_set = NULL_RTX, addr = NULL_RTX;
28201 if (!any_condjump_p (condjmp))
28204 if (get_attr_type (condgen) != TYPE_TEST
28205 && get_attr_type (condgen) != TYPE_ICMP
28206 && get_attr_type (condgen) != TYPE_INCDEC
28207 && get_attr_type (condgen) != TYPE_ALU)
28210 compare_set = single_set (condgen);
28211 if (compare_set == NULL_RTX
28212 && !TARGET_FUSE_ALU_AND_BRANCH)
28215 if (compare_set == NULL_RTX)
28218 rtx pat = PATTERN (condgen);
28219 for (i = 0; i < XVECLEN (pat, 0); i++)
28220 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28222 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
28223 if (GET_CODE (set_src) == COMPARE)
28224 compare_set = XVECEXP (pat, 0, i);
28226 alu_set = XVECEXP (pat, 0, i);
28229 if (compare_set == NULL_RTX)
28231 src = SET_SRC (compare_set);
28232 if (GET_CODE (src) != COMPARE)
28235 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
28237 if ((MEM_P (XEXP (src, 0))
28238 && CONST_INT_P (XEXP (src, 1)))
28239 || (MEM_P (XEXP (src, 1))
28240 && CONST_INT_P (XEXP (src, 0))))
28243 /* No fusion for RIP-relative address. */
28244 if (MEM_P (XEXP (src, 0)))
28245 addr = XEXP (XEXP (src, 0), 0);
28246 else if (MEM_P (XEXP (src, 1)))
28247 addr = XEXP (XEXP (src, 1), 0);
28250 ix86_address parts;
28251 int ok = ix86_decompose_address (addr, &parts);
28254 if (rip_relative_addr_p (&parts))
28258 test_if = SET_SRC (pc_set (condjmp));
28259 cond = XEXP (test_if, 0);
28260 ccode = GET_CODE (cond);
28261 /* Check whether conditional jump use Sign or Overflow Flags. */
28262 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
28269 /* Return true for TYPE_TEST and TYPE_ICMP. */
28270 if (get_attr_type (condgen) == TYPE_TEST
28271 || get_attr_type (condgen) == TYPE_ICMP)
28274 /* The following is the case that macro-fusion for alu + jmp. */
28275 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
28278 /* No fusion for alu op with memory destination operand. */
28279 dest = SET_DEST (alu_set);
28283 /* Macro-fusion for inc/dec + unsigned conditional jump is not
28285 if (get_attr_type (condgen) == TYPE_INCDEC
28295 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
28296 execution. It is applied if
28297 (1) IMUL instruction is on the top of list;
28298 (2) There exists the only producer of independent IMUL instruction in
28300 Return index of IMUL producer if it was found and -1 otherwise. */
28302 do_reorder_for_imul (rtx_insn **ready, int n_ready)
28305 rtx set, insn1, insn2;
28306 sd_iterator_def sd_it;
28311 if (!TARGET_BONNELL)
28314 /* Check that IMUL instruction is on the top of ready list. */
28315 insn = ready[n_ready - 1];
28316 set = single_set (insn);
28319 if (!(GET_CODE (SET_SRC (set)) == MULT
28320 && GET_MODE (SET_SRC (set)) == SImode))
28323 /* Search for producer of independent IMUL instruction. */
28324 for (i = n_ready - 2; i >= 0; i--)
28327 if (!NONDEBUG_INSN_P (insn))
28329 /* Skip IMUL instruction. */
28330 insn2 = PATTERN (insn);
28331 if (GET_CODE (insn2) == PARALLEL)
28332 insn2 = XVECEXP (insn2, 0, 0);
28333 if (GET_CODE (insn2) == SET
28334 && GET_CODE (SET_SRC (insn2)) == MULT
28335 && GET_MODE (SET_SRC (insn2)) == SImode)
28338 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
28341 con = DEP_CON (dep);
28342 if (!NONDEBUG_INSN_P (con))
28344 insn1 = PATTERN (con);
28345 if (GET_CODE (insn1) == PARALLEL)
28346 insn1 = XVECEXP (insn1, 0, 0);
28348 if (GET_CODE (insn1) == SET
28349 && GET_CODE (SET_SRC (insn1)) == MULT
28350 && GET_MODE (SET_SRC (insn1)) == SImode)
28352 sd_iterator_def sd_it1;
28354 /* Check if there is no other dependee for IMUL. */
28356 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
28359 pro = DEP_PRO (dep1);
28360 if (!NONDEBUG_INSN_P (pro))
28375 /* Try to find the best candidate on the top of ready list if two insns
28376 have the same priority - candidate is best if its dependees were
28377 scheduled earlier. Applied for Silvermont only.
28378 Return true if top 2 insns must be interchanged. */
28380 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
28382 rtx_insn *top = ready[n_ready - 1];
28383 rtx_insn *next = ready[n_ready - 2];
28385 sd_iterator_def sd_it;
28389 #define INSN_TICK(INSN) (HID (INSN)->tick)
28391 if (!TARGET_SILVERMONT && !TARGET_INTEL)
28394 if (!NONDEBUG_INSN_P (top))
28396 if (!NONJUMP_INSN_P (top))
28398 if (!NONDEBUG_INSN_P (next))
28400 if (!NONJUMP_INSN_P (next))
28402 set = single_set (top);
28405 set = single_set (next);
28409 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
28411 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
28413 /* Determine winner more precise. */
28414 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
28417 pro = DEP_PRO (dep);
28418 if (!NONDEBUG_INSN_P (pro))
28420 if (INSN_TICK (pro) > clock1)
28421 clock1 = INSN_TICK (pro);
28423 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
28426 pro = DEP_PRO (dep);
28427 if (!NONDEBUG_INSN_P (pro))
28429 if (INSN_TICK (pro) > clock2)
28430 clock2 = INSN_TICK (pro);
28433 if (clock1 == clock2)
28435 /* Determine winner - load must win. */
28436 enum attr_memory memory1, memory2;
28437 memory1 = get_attr_memory (top);
28438 memory2 = get_attr_memory (next);
28439 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
28442 return (bool) (clock2 < clock1);
28448 /* Perform possible reodering of ready list for Atom/Silvermont only.
28449 Return issue rate. */
28451 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28452 int *pn_ready, int clock_var)
28454 int issue_rate = -1;
28455 int n_ready = *pn_ready;
28460 /* Set up issue rate. */
28461 issue_rate = ix86_issue_rate ();
28463 /* Do reodering for BONNELL/SILVERMONT only. */
28464 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28467 /* Nothing to do if ready list contains only 1 instruction. */
28471 /* Do reodering for post-reload scheduler only. */
28472 if (!reload_completed)
28475 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28477 if (sched_verbose > 1)
28478 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28479 INSN_UID (ready[index]));
28481 /* Put IMUL producer (ready[index]) at the top of ready list. */
28482 insn = ready[index];
28483 for (i = index; i < n_ready - 1; i++)
28484 ready[i] = ready[i + 1];
28485 ready[n_ready - 1] = insn;
28489 /* Skip selective scheduling since HID is not populated in it. */
28492 && swap_top_of_ready_list (ready, n_ready))
28494 if (sched_verbose > 1)
28495 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28496 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28497 /* Swap 2 top elements of ready list. */
28498 insn = ready[n_ready - 1];
28499 ready[n_ready - 1] = ready[n_ready - 2];
28500 ready[n_ready - 2] = insn;
28506 ix86_class_likely_spilled_p (reg_class_t);
28508 /* Returns true if lhs of insn is HW function argument register and set up
28509 is_spilled to true if it is likely spilled HW register. */
28511 insn_is_function_arg (rtx insn, bool* is_spilled)
28515 if (!NONDEBUG_INSN_P (insn))
28517 /* Call instructions are not movable, ignore it. */
28520 insn = PATTERN (insn);
28521 if (GET_CODE (insn) == PARALLEL)
28522 insn = XVECEXP (insn, 0, 0);
28523 if (GET_CODE (insn) != SET)
28525 dst = SET_DEST (insn);
28526 if (REG_P (dst) && HARD_REGISTER_P (dst)
28527 && ix86_function_arg_regno_p (REGNO (dst)))
28529 /* Is it likely spilled HW register? */
28530 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28531 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28532 *is_spilled = true;
28538 /* Add output dependencies for chain of function adjacent arguments if only
28539 there is a move to likely spilled HW register. Return first argument
28540 if at least one dependence was added or NULL otherwise. */
28542 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28545 rtx_insn *last = call;
28546 rtx_insn *first_arg = NULL;
28547 bool is_spilled = false;
28549 head = PREV_INSN (head);
28551 /* Find nearest to call argument passing instruction. */
28554 last = PREV_INSN (last);
28557 if (!NONDEBUG_INSN_P (last))
28559 if (insn_is_function_arg (last, &is_spilled))
28567 insn = PREV_INSN (last);
28568 if (!INSN_P (insn))
28572 if (!NONDEBUG_INSN_P (insn))
28577 if (insn_is_function_arg (insn, &is_spilled))
28579 /* Add output depdendence between two function arguments if chain
28580 of output arguments contains likely spilled HW registers. */
28582 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28583 first_arg = last = insn;
28593 /* Add output or anti dependency from insn to first_arg to restrict its code
28596 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28601 /* Add anti dependencies for bounds stores. */
28603 && GET_CODE (PATTERN (insn)) == PARALLEL
28604 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28605 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28607 add_dependence (first_arg, insn, REG_DEP_ANTI);
28611 set = single_set (insn);
28614 tmp = SET_DEST (set);
28617 /* Add output dependency to the first function argument. */
28618 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28621 /* Add anti dependency. */
28622 add_dependence (first_arg, insn, REG_DEP_ANTI);
28625 /* Avoid cross block motion of function argument through adding dependency
28626 from the first non-jump instruction in bb. */
28628 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28630 rtx_insn *insn = BB_END (bb);
28634 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28636 rtx set = single_set (insn);
28639 avoid_func_arg_motion (arg, insn);
28643 if (insn == BB_HEAD (bb))
28645 insn = PREV_INSN (insn);
28649 /* Hook for pre-reload schedule - avoid motion of function arguments
28650 passed in likely spilled HW registers. */
28652 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28655 rtx_insn *first_arg = NULL;
28656 if (reload_completed)
28658 while (head != tail && DEBUG_INSN_P (head))
28659 head = NEXT_INSN (head);
28660 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28661 if (INSN_P (insn) && CALL_P (insn))
28663 first_arg = add_parameter_dependencies (insn, head);
28666 /* Add dependee for first argument to predecessors if only
28667 region contains more than one block. */
28668 basic_block bb = BLOCK_FOR_INSN (insn);
28669 int rgn = CONTAINING_RGN (bb->index);
28670 int nr_blks = RGN_NR_BLOCKS (rgn);
28671 /* Skip trivial regions and region head blocks that can have
28672 predecessors outside of region. */
28673 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28678 /* Regions are SCCs with the exception of selective
28679 scheduling with pipelining of outer blocks enabled.
28680 So also check that immediate predecessors of a non-head
28681 block are in the same region. */
28682 FOR_EACH_EDGE (e, ei, bb->preds)
28684 /* Avoid creating of loop-carried dependencies through
28685 using topological ordering in the region. */
28686 if (rgn == CONTAINING_RGN (e->src->index)
28687 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28688 add_dependee_for_func_arg (first_arg, e->src);
28696 else if (first_arg)
28697 avoid_func_arg_motion (first_arg, insn);
28700 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28701 HW registers to maximum, to schedule them at soon as possible. These are
28702 moves from function argument registers at the top of the function entry
28703 and moves from function return value registers after call. */
28705 ix86_adjust_priority (rtx_insn *insn, int priority)
28709 if (reload_completed)
28712 if (!NONDEBUG_INSN_P (insn))
28715 set = single_set (insn);
28718 rtx tmp = SET_SRC (set);
28720 && HARD_REGISTER_P (tmp)
28721 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28722 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28723 return current_sched_info->sched_max_insns_priority;
28729 /* Model decoder of Core 2/i7.
28730 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28731 track the instruction fetch block boundaries and make sure that long
28732 (9+ bytes) instructions are assigned to D0. */
28734 /* Maximum length of an insn that can be handled by
28735 a secondary decoder unit. '8' for Core 2/i7. */
28736 static int core2i7_secondary_decoder_max_insn_size;
28738 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28739 '16' for Core 2/i7. */
28740 static int core2i7_ifetch_block_size;
28742 /* Maximum number of instructions decoder can handle per cycle.
28743 '6' for Core 2/i7. */
28744 static int core2i7_ifetch_block_max_insns;
28746 typedef struct ix86_first_cycle_multipass_data_ *
28747 ix86_first_cycle_multipass_data_t;
28748 typedef const struct ix86_first_cycle_multipass_data_ *
28749 const_ix86_first_cycle_multipass_data_t;
28751 /* A variable to store target state across calls to max_issue within
28753 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28754 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28756 /* Initialize DATA. */
28758 core2i7_first_cycle_multipass_init (void *_data)
28760 ix86_first_cycle_multipass_data_t data
28761 = (ix86_first_cycle_multipass_data_t) _data;
28763 data->ifetch_block_len = 0;
28764 data->ifetch_block_n_insns = 0;
28765 data->ready_try_change = NULL;
28766 data->ready_try_change_size = 0;
28769 /* Advancing the cycle; reset ifetch block counts. */
28771 core2i7_dfa_post_advance_cycle (void)
28773 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28775 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28777 data->ifetch_block_len = 0;
28778 data->ifetch_block_n_insns = 0;
28781 static int min_insn_size (rtx_insn *);
28783 /* Filter out insns from ready_try that the core will not be able to issue
28784 on current cycle due to decoder. */
28786 core2i7_first_cycle_multipass_filter_ready_try
28787 (const_ix86_first_cycle_multipass_data_t data,
28788 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28795 if (ready_try[n_ready])
28798 insn = get_ready_element (n_ready);
28799 insn_size = min_insn_size (insn);
28801 if (/* If this is a too long an insn for a secondary decoder ... */
28802 (!first_cycle_insn_p
28803 && insn_size > core2i7_secondary_decoder_max_insn_size)
28804 /* ... or it would not fit into the ifetch block ... */
28805 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28806 /* ... or the decoder is full already ... */
28807 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28808 /* ... mask the insn out. */
28810 ready_try[n_ready] = 1;
28812 if (data->ready_try_change)
28813 bitmap_set_bit (data->ready_try_change, n_ready);
28818 /* Prepare for a new round of multipass lookahead scheduling. */
28820 core2i7_first_cycle_multipass_begin (void *_data,
28821 signed char *ready_try, int n_ready,
28822 bool first_cycle_insn_p)
28824 ix86_first_cycle_multipass_data_t data
28825 = (ix86_first_cycle_multipass_data_t) _data;
28826 const_ix86_first_cycle_multipass_data_t prev_data
28827 = ix86_first_cycle_multipass_data;
28829 /* Restore the state from the end of the previous round. */
28830 data->ifetch_block_len = prev_data->ifetch_block_len;
28831 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28833 /* Filter instructions that cannot be issued on current cycle due to
28834 decoder restrictions. */
28835 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28836 first_cycle_insn_p);
28839 /* INSN is being issued in current solution. Account for its impact on
28840 the decoder model. */
28842 core2i7_first_cycle_multipass_issue (void *_data,
28843 signed char *ready_try, int n_ready,
28844 rtx_insn *insn, const void *_prev_data)
28846 ix86_first_cycle_multipass_data_t data
28847 = (ix86_first_cycle_multipass_data_t) _data;
28848 const_ix86_first_cycle_multipass_data_t prev_data
28849 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28851 int insn_size = min_insn_size (insn);
28853 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28854 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28855 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28856 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28858 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28859 if (!data->ready_try_change)
28861 data->ready_try_change = sbitmap_alloc (n_ready);
28862 data->ready_try_change_size = n_ready;
28864 else if (data->ready_try_change_size < n_ready)
28866 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28868 data->ready_try_change_size = n_ready;
28870 bitmap_clear (data->ready_try_change);
28872 /* Filter out insns from ready_try that the core will not be able to issue
28873 on current cycle due to decoder. */
28874 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28878 /* Revert the effect on ready_try. */
28880 core2i7_first_cycle_multipass_backtrack (const void *_data,
28881 signed char *ready_try,
28882 int n_ready ATTRIBUTE_UNUSED)
28884 const_ix86_first_cycle_multipass_data_t data
28885 = (const_ix86_first_cycle_multipass_data_t) _data;
28886 unsigned int i = 0;
28887 sbitmap_iterator sbi;
28889 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28890 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28896 /* Save the result of multipass lookahead scheduling for the next round. */
28898 core2i7_first_cycle_multipass_end (const void *_data)
28900 const_ix86_first_cycle_multipass_data_t data
28901 = (const_ix86_first_cycle_multipass_data_t) _data;
28902 ix86_first_cycle_multipass_data_t next_data
28903 = ix86_first_cycle_multipass_data;
28907 next_data->ifetch_block_len = data->ifetch_block_len;
28908 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28912 /* Deallocate target data. */
28914 core2i7_first_cycle_multipass_fini (void *_data)
28916 ix86_first_cycle_multipass_data_t data
28917 = (ix86_first_cycle_multipass_data_t) _data;
28919 if (data->ready_try_change)
28921 sbitmap_free (data->ready_try_change);
28922 data->ready_try_change = NULL;
28923 data->ready_try_change_size = 0;
28927 /* Prepare for scheduling pass. */
28929 ix86_sched_init_global (FILE *, int, int)
28931 /* Install scheduling hooks for current CPU. Some of these hooks are used
28932 in time-critical parts of the scheduler, so we only set them up when
28933 they are actually used. */
28936 case PROCESSOR_CORE2:
28937 case PROCESSOR_NEHALEM:
28938 case PROCESSOR_SANDYBRIDGE:
28939 case PROCESSOR_HASWELL:
28940 /* Do not perform multipass scheduling for pre-reload schedule
28941 to save compile time. */
28942 if (reload_completed)
28944 targetm.sched.dfa_post_advance_cycle
28945 = core2i7_dfa_post_advance_cycle;
28946 targetm.sched.first_cycle_multipass_init
28947 = core2i7_first_cycle_multipass_init;
28948 targetm.sched.first_cycle_multipass_begin
28949 = core2i7_first_cycle_multipass_begin;
28950 targetm.sched.first_cycle_multipass_issue
28951 = core2i7_first_cycle_multipass_issue;
28952 targetm.sched.first_cycle_multipass_backtrack
28953 = core2i7_first_cycle_multipass_backtrack;
28954 targetm.sched.first_cycle_multipass_end
28955 = core2i7_first_cycle_multipass_end;
28956 targetm.sched.first_cycle_multipass_fini
28957 = core2i7_first_cycle_multipass_fini;
28959 /* Set decoder parameters. */
28960 core2i7_secondary_decoder_max_insn_size = 8;
28961 core2i7_ifetch_block_size = 16;
28962 core2i7_ifetch_block_max_insns = 6;
28965 /* ... Fall through ... */
28967 targetm.sched.dfa_post_advance_cycle = NULL;
28968 targetm.sched.first_cycle_multipass_init = NULL;
28969 targetm.sched.first_cycle_multipass_begin = NULL;
28970 targetm.sched.first_cycle_multipass_issue = NULL;
28971 targetm.sched.first_cycle_multipass_backtrack = NULL;
28972 targetm.sched.first_cycle_multipass_end = NULL;
28973 targetm.sched.first_cycle_multipass_fini = NULL;
28979 /* Compute the alignment given to a constant that is being placed in memory.
28980 EXP is the constant and ALIGN is the alignment that the object would
28982 The value of this function is used instead of that alignment to align
28986 ix86_constant_alignment (tree exp, int align)
28988 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28989 || TREE_CODE (exp) == INTEGER_CST)
28991 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
28993 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
28996 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28997 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28998 return BITS_PER_WORD;
29003 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
29004 the data type, and ALIGN is the alignment that the object would
29005 ordinarily have. */
29008 iamcu_alignment (tree type, int align)
29010 enum machine_mode mode;
29012 if (align < 32 || TYPE_USER_ALIGN (type))
29015 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29017 mode = TYPE_MODE (strip_array_types (type));
29018 switch (GET_MODE_CLASS (mode))
29021 case MODE_COMPLEX_INT:
29022 case MODE_COMPLEX_FLOAT:
29024 case MODE_DECIMAL_FLOAT:
29031 /* Compute the alignment for a static variable.
29032 TYPE is the data type, and ALIGN is the alignment that
29033 the object would ordinarily have. The value of this function is used
29034 instead of that alignment to align the object. */
29037 ix86_data_alignment (tree type, int align, bool opt)
29039 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29040 for symbols from other compilation units or symbols that don't need
29041 to bind locally. In order to preserve some ABI compatibility with
29042 those compilers, ensure we don't decrease alignment from what we
29045 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29047 /* A data structure, equal or greater than the size of a cache line
29048 (64 bytes in the Pentium 4 and other recent Intel processors, including
29049 processors based on Intel Core microarchitecture) should be aligned
29050 so that its base address is a multiple of a cache line size. */
29053 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29055 if (max_align < BITS_PER_WORD)
29056 max_align = BITS_PER_WORD;
29058 switch (ix86_align_data_type)
29060 case ix86_align_data_type_abi: opt = false; break;
29061 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29062 case ix86_align_data_type_cacheline: break;
29066 align = iamcu_alignment (type, align);
29069 && AGGREGATE_TYPE_P (type)
29070 && TYPE_SIZE (type)
29071 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29073 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29074 && align < max_align_compat)
29075 align = max_align_compat;
29076 if (wi::geu_p (TYPE_SIZE (type), max_align)
29077 && align < max_align)
29081 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29082 to 16byte boundary. */
29085 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29086 && TYPE_SIZE (type)
29087 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29088 && wi::geu_p (TYPE_SIZE (type), 128)
29096 if (TREE_CODE (type) == ARRAY_TYPE)
29098 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29100 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29103 else if (TREE_CODE (type) == COMPLEX_TYPE)
29106 if (TYPE_MODE (type) == DCmode && align < 64)
29108 if ((TYPE_MODE (type) == XCmode
29109 || TYPE_MODE (type) == TCmode) && align < 128)
29112 else if ((TREE_CODE (type) == RECORD_TYPE
29113 || TREE_CODE (type) == UNION_TYPE
29114 || TREE_CODE (type) == QUAL_UNION_TYPE)
29115 && TYPE_FIELDS (type))
29117 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29119 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29122 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29123 || TREE_CODE (type) == INTEGER_TYPE)
29125 if (TYPE_MODE (type) == DFmode && align < 64)
29127 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29134 /* Compute the alignment for a local variable or a stack slot. EXP is
29135 the data type or decl itself, MODE is the widest mode available and
29136 ALIGN is the alignment that the object would ordinarily have. The
29137 value of this macro is used instead of that alignment to align the
29141 ix86_local_alignment (tree exp, machine_mode mode,
29142 unsigned int align)
29146 if (exp && DECL_P (exp))
29148 type = TREE_TYPE (exp);
29157 /* Don't do dynamic stack realignment for long long objects with
29158 -mpreferred-stack-boundary=2. */
29161 && ix86_preferred_stack_boundary < 64
29162 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29163 && (!type || !TYPE_USER_ALIGN (type))
29164 && (!decl || !DECL_USER_ALIGN (decl)))
29167 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29168 register in MODE. We will return the largest alignment of XF
29172 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29173 align = GET_MODE_ALIGNMENT (DFmode);
29177 /* Don't increase alignment for Intel MCU psABI. */
29181 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29182 to 16byte boundary. Exact wording is:
29184 An array uses the same alignment as its elements, except that a local or
29185 global array variable of length at least 16 bytes or
29186 a C99 variable-length array variable always has alignment of at least 16 bytes.
29188 This was added to allow use of aligned SSE instructions at arrays. This
29189 rule is meant for static storage (where compiler can not do the analysis
29190 by itself). We follow it for automatic variables only when convenient.
29191 We fully control everything in the function compiled and functions from
29192 other unit can not rely on the alignment.
29194 Exclude va_list type. It is the common case of local array where
29195 we can not benefit from the alignment.
29197 TODO: Probably one should optimize for size only when var is not escaping. */
29198 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29201 if (AGGREGATE_TYPE_P (type)
29202 && (va_list_type_node == NULL_TREE
29203 || (TYPE_MAIN_VARIANT (type)
29204 != TYPE_MAIN_VARIANT (va_list_type_node)))
29205 && TYPE_SIZE (type)
29206 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29207 && wi::geu_p (TYPE_SIZE (type), 16)
29211 if (TREE_CODE (type) == ARRAY_TYPE)
29213 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29215 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29218 else if (TREE_CODE (type) == COMPLEX_TYPE)
29220 if (TYPE_MODE (type) == DCmode && align < 64)
29222 if ((TYPE_MODE (type) == XCmode
29223 || TYPE_MODE (type) == TCmode) && align < 128)
29226 else if ((TREE_CODE (type) == RECORD_TYPE
29227 || TREE_CODE (type) == UNION_TYPE
29228 || TREE_CODE (type) == QUAL_UNION_TYPE)
29229 && TYPE_FIELDS (type))
29231 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29233 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29236 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29237 || TREE_CODE (type) == INTEGER_TYPE)
29240 if (TYPE_MODE (type) == DFmode && align < 64)
29242 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29248 /* Compute the minimum required alignment for dynamic stack realignment
29249 purposes for a local variable, parameter or a stack slot. EXP is
29250 the data type or decl itself, MODE is its mode and ALIGN is the
29251 alignment that the object would ordinarily have. */
29254 ix86_minimum_alignment (tree exp, machine_mode mode,
29255 unsigned int align)
29259 if (exp && DECL_P (exp))
29261 type = TREE_TYPE (exp);
29270 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29273 /* Don't do dynamic stack realignment for long long objects with
29274 -mpreferred-stack-boundary=2. */
29275 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29276 && (!type || !TYPE_USER_ALIGN (type))
29277 && (!decl || !DECL_USER_ALIGN (decl)))
29283 /* Find a location for the static chain incoming to a nested function.
29284 This is a register, unless all free registers are used by arguments. */
29287 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29291 /* While this function won't be called by the middle-end when a static
29292 chain isn't needed, it's also used throughout the backend so it's
29293 easiest to keep this check centralized. */
29294 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29299 /* We always use R10 in 64-bit mode. */
29304 const_tree fntype, fndecl;
29307 /* By default in 32-bit mode we use ECX to pass the static chain. */
29310 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29312 fntype = TREE_TYPE (fndecl_or_type);
29313 fndecl = fndecl_or_type;
29317 fntype = fndecl_or_type;
29321 ccvt = ix86_get_callcvt (fntype);
29322 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29324 /* Fastcall functions use ecx/edx for arguments, which leaves
29325 us with EAX for the static chain.
29326 Thiscall functions use ecx for arguments, which also
29327 leaves us with EAX for the static chain. */
29330 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29332 /* Thiscall functions use ecx for arguments, which leaves
29333 us with EAX and EDX for the static chain.
29334 We are using for abi-compatibility EAX. */
29337 else if (ix86_function_regparm (fntype, fndecl) == 3)
29339 /* For regparm 3, we have no free call-clobbered registers in
29340 which to store the static chain. In order to implement this,
29341 we have the trampoline push the static chain to the stack.
29342 However, we can't push a value below the return address when
29343 we call the nested function directly, so we have to use an
29344 alternate entry point. For this we use ESI, and have the
29345 alternate entry point push ESI, so that things appear the
29346 same once we're executing the nested function. */
29349 if (fndecl == current_function_decl)
29350 ix86_static_chain_on_stack = true;
29351 return gen_frame_mem (SImode,
29352 plus_constant (Pmode,
29353 arg_pointer_rtx, -8));
29359 return gen_rtx_REG (Pmode, regno);
29362 /* Emit RTL insns to initialize the variable parts of a trampoline.
29363 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29364 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29365 to be passed to the target function. */
29368 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29374 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29380 /* Load the function address to r11. Try to load address using
29381 the shorter movl instead of movabs. We may want to support
29382 movq for kernel mode, but kernel does not use trampolines at
29383 the moment. FNADDR is a 32bit address and may not be in
29384 DImode when ptr_mode == SImode. Always use movl in this
29386 if (ptr_mode == SImode
29387 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29389 fnaddr = copy_addr_to_reg (fnaddr);
29391 mem = adjust_address (m_tramp, HImode, offset);
29392 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29394 mem = adjust_address (m_tramp, SImode, offset + 2);
29395 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29400 mem = adjust_address (m_tramp, HImode, offset);
29401 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29403 mem = adjust_address (m_tramp, DImode, offset + 2);
29404 emit_move_insn (mem, fnaddr);
29408 /* Load static chain using movabs to r10. Use the shorter movl
29409 instead of movabs when ptr_mode == SImode. */
29410 if (ptr_mode == SImode)
29421 mem = adjust_address (m_tramp, HImode, offset);
29422 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29424 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29425 emit_move_insn (mem, chain_value);
29428 /* Jump to r11; the last (unused) byte is a nop, only there to
29429 pad the write out to a single 32-bit store. */
29430 mem = adjust_address (m_tramp, SImode, offset);
29431 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29438 /* Depending on the static chain location, either load a register
29439 with a constant, or push the constant to the stack. All of the
29440 instructions are the same size. */
29441 chain = ix86_static_chain (fndecl, true);
29444 switch (REGNO (chain))
29447 opcode = 0xb8; break;
29449 opcode = 0xb9; break;
29451 gcc_unreachable ();
29457 mem = adjust_address (m_tramp, QImode, offset);
29458 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29460 mem = adjust_address (m_tramp, SImode, offset + 1);
29461 emit_move_insn (mem, chain_value);
29464 mem = adjust_address (m_tramp, QImode, offset);
29465 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29467 mem = adjust_address (m_tramp, SImode, offset + 1);
29469 /* Compute offset from the end of the jmp to the target function.
29470 In the case in which the trampoline stores the static chain on
29471 the stack, we need to skip the first insn which pushes the
29472 (call-saved) register static chain; this push is 1 byte. */
29474 disp = expand_binop (SImode, sub_optab, fnaddr,
29475 plus_constant (Pmode, XEXP (m_tramp, 0),
29476 offset - (MEM_P (chain) ? 1 : 0)),
29477 NULL_RTX, 1, OPTAB_DIRECT);
29478 emit_move_insn (mem, disp);
29481 gcc_assert (offset <= TRAMPOLINE_SIZE);
29483 #ifdef HAVE_ENABLE_EXECUTE_STACK
29484 #ifdef CHECK_EXECUTE_STACK_ENABLED
29485 if (CHECK_EXECUTE_STACK_ENABLED)
29487 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29488 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29492 /* The following file contains several enumerations and data structures
29493 built from the definitions in i386-builtin-types.def. */
29495 #include "i386-builtin-types.inc"
29497 /* Table for the ix86 builtin non-function types. */
29498 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29500 /* Retrieve an element from the above table, building some of
29501 the types lazily. */
29504 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29506 unsigned int index;
29509 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29511 type = ix86_builtin_type_tab[(int) tcode];
29515 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29516 if (tcode <= IX86_BT_LAST_VECT)
29520 index = tcode - IX86_BT_LAST_PRIM - 1;
29521 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29522 mode = ix86_builtin_type_vect_mode[index];
29524 type = build_vector_type_for_mode (itype, mode);
29530 index = tcode - IX86_BT_LAST_VECT - 1;
29531 if (tcode <= IX86_BT_LAST_PTR)
29532 quals = TYPE_UNQUALIFIED;
29534 quals = TYPE_QUAL_CONST;
29536 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29537 if (quals != TYPE_UNQUALIFIED)
29538 itype = build_qualified_type (itype, quals);
29540 type = build_pointer_type (itype);
29543 ix86_builtin_type_tab[(int) tcode] = type;
29547 /* Table for the ix86 builtin function types. */
29548 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29550 /* Retrieve an element from the above table, building some of
29551 the types lazily. */
29554 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29558 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29560 type = ix86_builtin_func_type_tab[(int) tcode];
29564 if (tcode <= IX86_BT_LAST_FUNC)
29566 unsigned start = ix86_builtin_func_start[(int) tcode];
29567 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29568 tree rtype, atype, args = void_list_node;
29571 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29572 for (i = after - 1; i > start; --i)
29574 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29575 args = tree_cons (NULL, atype, args);
29578 type = build_function_type (rtype, args);
29582 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29583 enum ix86_builtin_func_type icode;
29585 icode = ix86_builtin_func_alias_base[index];
29586 type = ix86_get_builtin_func_type (icode);
29589 ix86_builtin_func_type_tab[(int) tcode] = type;
29594 /* Codes for all the SSE/MMX builtins. */
29597 IX86_BUILTIN_ADDPS,
29598 IX86_BUILTIN_ADDSS,
29599 IX86_BUILTIN_DIVPS,
29600 IX86_BUILTIN_DIVSS,
29601 IX86_BUILTIN_MULPS,
29602 IX86_BUILTIN_MULSS,
29603 IX86_BUILTIN_SUBPS,
29604 IX86_BUILTIN_SUBSS,
29606 IX86_BUILTIN_CMPEQPS,
29607 IX86_BUILTIN_CMPLTPS,
29608 IX86_BUILTIN_CMPLEPS,
29609 IX86_BUILTIN_CMPGTPS,
29610 IX86_BUILTIN_CMPGEPS,
29611 IX86_BUILTIN_CMPNEQPS,
29612 IX86_BUILTIN_CMPNLTPS,
29613 IX86_BUILTIN_CMPNLEPS,
29614 IX86_BUILTIN_CMPNGTPS,
29615 IX86_BUILTIN_CMPNGEPS,
29616 IX86_BUILTIN_CMPORDPS,
29617 IX86_BUILTIN_CMPUNORDPS,
29618 IX86_BUILTIN_CMPEQSS,
29619 IX86_BUILTIN_CMPLTSS,
29620 IX86_BUILTIN_CMPLESS,
29621 IX86_BUILTIN_CMPNEQSS,
29622 IX86_BUILTIN_CMPNLTSS,
29623 IX86_BUILTIN_CMPNLESS,
29624 IX86_BUILTIN_CMPORDSS,
29625 IX86_BUILTIN_CMPUNORDSS,
29627 IX86_BUILTIN_COMIEQSS,
29628 IX86_BUILTIN_COMILTSS,
29629 IX86_BUILTIN_COMILESS,
29630 IX86_BUILTIN_COMIGTSS,
29631 IX86_BUILTIN_COMIGESS,
29632 IX86_BUILTIN_COMINEQSS,
29633 IX86_BUILTIN_UCOMIEQSS,
29634 IX86_BUILTIN_UCOMILTSS,
29635 IX86_BUILTIN_UCOMILESS,
29636 IX86_BUILTIN_UCOMIGTSS,
29637 IX86_BUILTIN_UCOMIGESS,
29638 IX86_BUILTIN_UCOMINEQSS,
29640 IX86_BUILTIN_CVTPI2PS,
29641 IX86_BUILTIN_CVTPS2PI,
29642 IX86_BUILTIN_CVTSI2SS,
29643 IX86_BUILTIN_CVTSI642SS,
29644 IX86_BUILTIN_CVTSS2SI,
29645 IX86_BUILTIN_CVTSS2SI64,
29646 IX86_BUILTIN_CVTTPS2PI,
29647 IX86_BUILTIN_CVTTSS2SI,
29648 IX86_BUILTIN_CVTTSS2SI64,
29650 IX86_BUILTIN_MAXPS,
29651 IX86_BUILTIN_MAXSS,
29652 IX86_BUILTIN_MINPS,
29653 IX86_BUILTIN_MINSS,
29655 IX86_BUILTIN_LOADUPS,
29656 IX86_BUILTIN_STOREUPS,
29657 IX86_BUILTIN_MOVSS,
29659 IX86_BUILTIN_MOVHLPS,
29660 IX86_BUILTIN_MOVLHPS,
29661 IX86_BUILTIN_LOADHPS,
29662 IX86_BUILTIN_LOADLPS,
29663 IX86_BUILTIN_STOREHPS,
29664 IX86_BUILTIN_STORELPS,
29666 IX86_BUILTIN_MASKMOVQ,
29667 IX86_BUILTIN_MOVMSKPS,
29668 IX86_BUILTIN_PMOVMSKB,
29670 IX86_BUILTIN_MOVNTPS,
29671 IX86_BUILTIN_MOVNTQ,
29673 IX86_BUILTIN_LOADDQU,
29674 IX86_BUILTIN_STOREDQU,
29676 IX86_BUILTIN_PACKSSWB,
29677 IX86_BUILTIN_PACKSSDW,
29678 IX86_BUILTIN_PACKUSWB,
29680 IX86_BUILTIN_PADDB,
29681 IX86_BUILTIN_PADDW,
29682 IX86_BUILTIN_PADDD,
29683 IX86_BUILTIN_PADDQ,
29684 IX86_BUILTIN_PADDSB,
29685 IX86_BUILTIN_PADDSW,
29686 IX86_BUILTIN_PADDUSB,
29687 IX86_BUILTIN_PADDUSW,
29688 IX86_BUILTIN_PSUBB,
29689 IX86_BUILTIN_PSUBW,
29690 IX86_BUILTIN_PSUBD,
29691 IX86_BUILTIN_PSUBQ,
29692 IX86_BUILTIN_PSUBSB,
29693 IX86_BUILTIN_PSUBSW,
29694 IX86_BUILTIN_PSUBUSB,
29695 IX86_BUILTIN_PSUBUSW,
29698 IX86_BUILTIN_PANDN,
29702 IX86_BUILTIN_PAVGB,
29703 IX86_BUILTIN_PAVGW,
29705 IX86_BUILTIN_PCMPEQB,
29706 IX86_BUILTIN_PCMPEQW,
29707 IX86_BUILTIN_PCMPEQD,
29708 IX86_BUILTIN_PCMPGTB,
29709 IX86_BUILTIN_PCMPGTW,
29710 IX86_BUILTIN_PCMPGTD,
29712 IX86_BUILTIN_PMADDWD,
29714 IX86_BUILTIN_PMAXSW,
29715 IX86_BUILTIN_PMAXUB,
29716 IX86_BUILTIN_PMINSW,
29717 IX86_BUILTIN_PMINUB,
29719 IX86_BUILTIN_PMULHUW,
29720 IX86_BUILTIN_PMULHW,
29721 IX86_BUILTIN_PMULLW,
29723 IX86_BUILTIN_PSADBW,
29724 IX86_BUILTIN_PSHUFW,
29726 IX86_BUILTIN_PSLLW,
29727 IX86_BUILTIN_PSLLD,
29728 IX86_BUILTIN_PSLLQ,
29729 IX86_BUILTIN_PSRAW,
29730 IX86_BUILTIN_PSRAD,
29731 IX86_BUILTIN_PSRLW,
29732 IX86_BUILTIN_PSRLD,
29733 IX86_BUILTIN_PSRLQ,
29734 IX86_BUILTIN_PSLLWI,
29735 IX86_BUILTIN_PSLLDI,
29736 IX86_BUILTIN_PSLLQI,
29737 IX86_BUILTIN_PSRAWI,
29738 IX86_BUILTIN_PSRADI,
29739 IX86_BUILTIN_PSRLWI,
29740 IX86_BUILTIN_PSRLDI,
29741 IX86_BUILTIN_PSRLQI,
29743 IX86_BUILTIN_PUNPCKHBW,
29744 IX86_BUILTIN_PUNPCKHWD,
29745 IX86_BUILTIN_PUNPCKHDQ,
29746 IX86_BUILTIN_PUNPCKLBW,
29747 IX86_BUILTIN_PUNPCKLWD,
29748 IX86_BUILTIN_PUNPCKLDQ,
29750 IX86_BUILTIN_SHUFPS,
29752 IX86_BUILTIN_RCPPS,
29753 IX86_BUILTIN_RCPSS,
29754 IX86_BUILTIN_RSQRTPS,
29755 IX86_BUILTIN_RSQRTPS_NR,
29756 IX86_BUILTIN_RSQRTSS,
29757 IX86_BUILTIN_RSQRTF,
29758 IX86_BUILTIN_SQRTPS,
29759 IX86_BUILTIN_SQRTPS_NR,
29760 IX86_BUILTIN_SQRTSS,
29762 IX86_BUILTIN_UNPCKHPS,
29763 IX86_BUILTIN_UNPCKLPS,
29765 IX86_BUILTIN_ANDPS,
29766 IX86_BUILTIN_ANDNPS,
29768 IX86_BUILTIN_XORPS,
29771 IX86_BUILTIN_LDMXCSR,
29772 IX86_BUILTIN_STMXCSR,
29773 IX86_BUILTIN_SFENCE,
29775 IX86_BUILTIN_FXSAVE,
29776 IX86_BUILTIN_FXRSTOR,
29777 IX86_BUILTIN_FXSAVE64,
29778 IX86_BUILTIN_FXRSTOR64,
29780 IX86_BUILTIN_XSAVE,
29781 IX86_BUILTIN_XRSTOR,
29782 IX86_BUILTIN_XSAVE64,
29783 IX86_BUILTIN_XRSTOR64,
29785 IX86_BUILTIN_XSAVEOPT,
29786 IX86_BUILTIN_XSAVEOPT64,
29788 IX86_BUILTIN_XSAVEC,
29789 IX86_BUILTIN_XSAVEC64,
29791 IX86_BUILTIN_XSAVES,
29792 IX86_BUILTIN_XRSTORS,
29793 IX86_BUILTIN_XSAVES64,
29794 IX86_BUILTIN_XRSTORS64,
29796 /* 3DNow! Original */
29797 IX86_BUILTIN_FEMMS,
29798 IX86_BUILTIN_PAVGUSB,
29799 IX86_BUILTIN_PF2ID,
29800 IX86_BUILTIN_PFACC,
29801 IX86_BUILTIN_PFADD,
29802 IX86_BUILTIN_PFCMPEQ,
29803 IX86_BUILTIN_PFCMPGE,
29804 IX86_BUILTIN_PFCMPGT,
29805 IX86_BUILTIN_PFMAX,
29806 IX86_BUILTIN_PFMIN,
29807 IX86_BUILTIN_PFMUL,
29808 IX86_BUILTIN_PFRCP,
29809 IX86_BUILTIN_PFRCPIT1,
29810 IX86_BUILTIN_PFRCPIT2,
29811 IX86_BUILTIN_PFRSQIT1,
29812 IX86_BUILTIN_PFRSQRT,
29813 IX86_BUILTIN_PFSUB,
29814 IX86_BUILTIN_PFSUBR,
29815 IX86_BUILTIN_PI2FD,
29816 IX86_BUILTIN_PMULHRW,
29818 /* 3DNow! Athlon Extensions */
29819 IX86_BUILTIN_PF2IW,
29820 IX86_BUILTIN_PFNACC,
29821 IX86_BUILTIN_PFPNACC,
29822 IX86_BUILTIN_PI2FW,
29823 IX86_BUILTIN_PSWAPDSI,
29824 IX86_BUILTIN_PSWAPDSF,
29827 IX86_BUILTIN_ADDPD,
29828 IX86_BUILTIN_ADDSD,
29829 IX86_BUILTIN_DIVPD,
29830 IX86_BUILTIN_DIVSD,
29831 IX86_BUILTIN_MULPD,
29832 IX86_BUILTIN_MULSD,
29833 IX86_BUILTIN_SUBPD,
29834 IX86_BUILTIN_SUBSD,
29836 IX86_BUILTIN_CMPEQPD,
29837 IX86_BUILTIN_CMPLTPD,
29838 IX86_BUILTIN_CMPLEPD,
29839 IX86_BUILTIN_CMPGTPD,
29840 IX86_BUILTIN_CMPGEPD,
29841 IX86_BUILTIN_CMPNEQPD,
29842 IX86_BUILTIN_CMPNLTPD,
29843 IX86_BUILTIN_CMPNLEPD,
29844 IX86_BUILTIN_CMPNGTPD,
29845 IX86_BUILTIN_CMPNGEPD,
29846 IX86_BUILTIN_CMPORDPD,
29847 IX86_BUILTIN_CMPUNORDPD,
29848 IX86_BUILTIN_CMPEQSD,
29849 IX86_BUILTIN_CMPLTSD,
29850 IX86_BUILTIN_CMPLESD,
29851 IX86_BUILTIN_CMPNEQSD,
29852 IX86_BUILTIN_CMPNLTSD,
29853 IX86_BUILTIN_CMPNLESD,
29854 IX86_BUILTIN_CMPORDSD,
29855 IX86_BUILTIN_CMPUNORDSD,
29857 IX86_BUILTIN_COMIEQSD,
29858 IX86_BUILTIN_COMILTSD,
29859 IX86_BUILTIN_COMILESD,
29860 IX86_BUILTIN_COMIGTSD,
29861 IX86_BUILTIN_COMIGESD,
29862 IX86_BUILTIN_COMINEQSD,
29863 IX86_BUILTIN_UCOMIEQSD,
29864 IX86_BUILTIN_UCOMILTSD,
29865 IX86_BUILTIN_UCOMILESD,
29866 IX86_BUILTIN_UCOMIGTSD,
29867 IX86_BUILTIN_UCOMIGESD,
29868 IX86_BUILTIN_UCOMINEQSD,
29870 IX86_BUILTIN_MAXPD,
29871 IX86_BUILTIN_MAXSD,
29872 IX86_BUILTIN_MINPD,
29873 IX86_BUILTIN_MINSD,
29875 IX86_BUILTIN_ANDPD,
29876 IX86_BUILTIN_ANDNPD,
29878 IX86_BUILTIN_XORPD,
29880 IX86_BUILTIN_SQRTPD,
29881 IX86_BUILTIN_SQRTSD,
29883 IX86_BUILTIN_UNPCKHPD,
29884 IX86_BUILTIN_UNPCKLPD,
29886 IX86_BUILTIN_SHUFPD,
29888 IX86_BUILTIN_LOADUPD,
29889 IX86_BUILTIN_STOREUPD,
29890 IX86_BUILTIN_MOVSD,
29892 IX86_BUILTIN_LOADHPD,
29893 IX86_BUILTIN_LOADLPD,
29895 IX86_BUILTIN_CVTDQ2PD,
29896 IX86_BUILTIN_CVTDQ2PS,
29898 IX86_BUILTIN_CVTPD2DQ,
29899 IX86_BUILTIN_CVTPD2PI,
29900 IX86_BUILTIN_CVTPD2PS,
29901 IX86_BUILTIN_CVTTPD2DQ,
29902 IX86_BUILTIN_CVTTPD2PI,
29904 IX86_BUILTIN_CVTPI2PD,
29905 IX86_BUILTIN_CVTSI2SD,
29906 IX86_BUILTIN_CVTSI642SD,
29908 IX86_BUILTIN_CVTSD2SI,
29909 IX86_BUILTIN_CVTSD2SI64,
29910 IX86_BUILTIN_CVTSD2SS,
29911 IX86_BUILTIN_CVTSS2SD,
29912 IX86_BUILTIN_CVTTSD2SI,
29913 IX86_BUILTIN_CVTTSD2SI64,
29915 IX86_BUILTIN_CVTPS2DQ,
29916 IX86_BUILTIN_CVTPS2PD,
29917 IX86_BUILTIN_CVTTPS2DQ,
29919 IX86_BUILTIN_MOVNTI,
29920 IX86_BUILTIN_MOVNTI64,
29921 IX86_BUILTIN_MOVNTPD,
29922 IX86_BUILTIN_MOVNTDQ,
29924 IX86_BUILTIN_MOVQ128,
29927 IX86_BUILTIN_MASKMOVDQU,
29928 IX86_BUILTIN_MOVMSKPD,
29929 IX86_BUILTIN_PMOVMSKB128,
29931 IX86_BUILTIN_PACKSSWB128,
29932 IX86_BUILTIN_PACKSSDW128,
29933 IX86_BUILTIN_PACKUSWB128,
29935 IX86_BUILTIN_PADDB128,
29936 IX86_BUILTIN_PADDW128,
29937 IX86_BUILTIN_PADDD128,
29938 IX86_BUILTIN_PADDQ128,
29939 IX86_BUILTIN_PADDSB128,
29940 IX86_BUILTIN_PADDSW128,
29941 IX86_BUILTIN_PADDUSB128,
29942 IX86_BUILTIN_PADDUSW128,
29943 IX86_BUILTIN_PSUBB128,
29944 IX86_BUILTIN_PSUBW128,
29945 IX86_BUILTIN_PSUBD128,
29946 IX86_BUILTIN_PSUBQ128,
29947 IX86_BUILTIN_PSUBSB128,
29948 IX86_BUILTIN_PSUBSW128,
29949 IX86_BUILTIN_PSUBUSB128,
29950 IX86_BUILTIN_PSUBUSW128,
29952 IX86_BUILTIN_PAND128,
29953 IX86_BUILTIN_PANDN128,
29954 IX86_BUILTIN_POR128,
29955 IX86_BUILTIN_PXOR128,
29957 IX86_BUILTIN_PAVGB128,
29958 IX86_BUILTIN_PAVGW128,
29960 IX86_BUILTIN_PCMPEQB128,
29961 IX86_BUILTIN_PCMPEQW128,
29962 IX86_BUILTIN_PCMPEQD128,
29963 IX86_BUILTIN_PCMPGTB128,
29964 IX86_BUILTIN_PCMPGTW128,
29965 IX86_BUILTIN_PCMPGTD128,
29967 IX86_BUILTIN_PMADDWD128,
29969 IX86_BUILTIN_PMAXSW128,
29970 IX86_BUILTIN_PMAXUB128,
29971 IX86_BUILTIN_PMINSW128,
29972 IX86_BUILTIN_PMINUB128,
29974 IX86_BUILTIN_PMULUDQ,
29975 IX86_BUILTIN_PMULUDQ128,
29976 IX86_BUILTIN_PMULHUW128,
29977 IX86_BUILTIN_PMULHW128,
29978 IX86_BUILTIN_PMULLW128,
29980 IX86_BUILTIN_PSADBW128,
29981 IX86_BUILTIN_PSHUFHW,
29982 IX86_BUILTIN_PSHUFLW,
29983 IX86_BUILTIN_PSHUFD,
29985 IX86_BUILTIN_PSLLDQI128,
29986 IX86_BUILTIN_PSLLWI128,
29987 IX86_BUILTIN_PSLLDI128,
29988 IX86_BUILTIN_PSLLQI128,
29989 IX86_BUILTIN_PSRAWI128,
29990 IX86_BUILTIN_PSRADI128,
29991 IX86_BUILTIN_PSRLDQI128,
29992 IX86_BUILTIN_PSRLWI128,
29993 IX86_BUILTIN_PSRLDI128,
29994 IX86_BUILTIN_PSRLQI128,
29996 IX86_BUILTIN_PSLLDQ128,
29997 IX86_BUILTIN_PSLLW128,
29998 IX86_BUILTIN_PSLLD128,
29999 IX86_BUILTIN_PSLLQ128,
30000 IX86_BUILTIN_PSRAW128,
30001 IX86_BUILTIN_PSRAD128,
30002 IX86_BUILTIN_PSRLW128,
30003 IX86_BUILTIN_PSRLD128,
30004 IX86_BUILTIN_PSRLQ128,
30006 IX86_BUILTIN_PUNPCKHBW128,
30007 IX86_BUILTIN_PUNPCKHWD128,
30008 IX86_BUILTIN_PUNPCKHDQ128,
30009 IX86_BUILTIN_PUNPCKHQDQ128,
30010 IX86_BUILTIN_PUNPCKLBW128,
30011 IX86_BUILTIN_PUNPCKLWD128,
30012 IX86_BUILTIN_PUNPCKLDQ128,
30013 IX86_BUILTIN_PUNPCKLQDQ128,
30015 IX86_BUILTIN_CLFLUSH,
30016 IX86_BUILTIN_MFENCE,
30017 IX86_BUILTIN_LFENCE,
30018 IX86_BUILTIN_PAUSE,
30020 IX86_BUILTIN_FNSTENV,
30021 IX86_BUILTIN_FLDENV,
30022 IX86_BUILTIN_FNSTSW,
30023 IX86_BUILTIN_FNCLEX,
30025 IX86_BUILTIN_BSRSI,
30026 IX86_BUILTIN_BSRDI,
30027 IX86_BUILTIN_RDPMC,
30028 IX86_BUILTIN_RDTSC,
30029 IX86_BUILTIN_RDTSCP,
30030 IX86_BUILTIN_ROLQI,
30031 IX86_BUILTIN_ROLHI,
30032 IX86_BUILTIN_RORQI,
30033 IX86_BUILTIN_RORHI,
30036 IX86_BUILTIN_ADDSUBPS,
30037 IX86_BUILTIN_HADDPS,
30038 IX86_BUILTIN_HSUBPS,
30039 IX86_BUILTIN_MOVSHDUP,
30040 IX86_BUILTIN_MOVSLDUP,
30041 IX86_BUILTIN_ADDSUBPD,
30042 IX86_BUILTIN_HADDPD,
30043 IX86_BUILTIN_HSUBPD,
30044 IX86_BUILTIN_LDDQU,
30046 IX86_BUILTIN_MONITOR,
30047 IX86_BUILTIN_MWAIT,
30048 IX86_BUILTIN_CLZERO,
30051 IX86_BUILTIN_PHADDW,
30052 IX86_BUILTIN_PHADDD,
30053 IX86_BUILTIN_PHADDSW,
30054 IX86_BUILTIN_PHSUBW,
30055 IX86_BUILTIN_PHSUBD,
30056 IX86_BUILTIN_PHSUBSW,
30057 IX86_BUILTIN_PMADDUBSW,
30058 IX86_BUILTIN_PMULHRSW,
30059 IX86_BUILTIN_PSHUFB,
30060 IX86_BUILTIN_PSIGNB,
30061 IX86_BUILTIN_PSIGNW,
30062 IX86_BUILTIN_PSIGND,
30063 IX86_BUILTIN_PALIGNR,
30064 IX86_BUILTIN_PABSB,
30065 IX86_BUILTIN_PABSW,
30066 IX86_BUILTIN_PABSD,
30068 IX86_BUILTIN_PHADDW128,
30069 IX86_BUILTIN_PHADDD128,
30070 IX86_BUILTIN_PHADDSW128,
30071 IX86_BUILTIN_PHSUBW128,
30072 IX86_BUILTIN_PHSUBD128,
30073 IX86_BUILTIN_PHSUBSW128,
30074 IX86_BUILTIN_PMADDUBSW128,
30075 IX86_BUILTIN_PMULHRSW128,
30076 IX86_BUILTIN_PSHUFB128,
30077 IX86_BUILTIN_PSIGNB128,
30078 IX86_BUILTIN_PSIGNW128,
30079 IX86_BUILTIN_PSIGND128,
30080 IX86_BUILTIN_PALIGNR128,
30081 IX86_BUILTIN_PABSB128,
30082 IX86_BUILTIN_PABSW128,
30083 IX86_BUILTIN_PABSD128,
30085 /* AMDFAM10 - SSE4A New Instructions. */
30086 IX86_BUILTIN_MOVNTSD,
30087 IX86_BUILTIN_MOVNTSS,
30088 IX86_BUILTIN_EXTRQI,
30089 IX86_BUILTIN_EXTRQ,
30090 IX86_BUILTIN_INSERTQI,
30091 IX86_BUILTIN_INSERTQ,
30094 IX86_BUILTIN_BLENDPD,
30095 IX86_BUILTIN_BLENDPS,
30096 IX86_BUILTIN_BLENDVPD,
30097 IX86_BUILTIN_BLENDVPS,
30098 IX86_BUILTIN_PBLENDVB128,
30099 IX86_BUILTIN_PBLENDW128,
30104 IX86_BUILTIN_INSERTPS128,
30106 IX86_BUILTIN_MOVNTDQA,
30107 IX86_BUILTIN_MPSADBW128,
30108 IX86_BUILTIN_PACKUSDW128,
30109 IX86_BUILTIN_PCMPEQQ,
30110 IX86_BUILTIN_PHMINPOSUW128,
30112 IX86_BUILTIN_PMAXSB128,
30113 IX86_BUILTIN_PMAXSD128,
30114 IX86_BUILTIN_PMAXUD128,
30115 IX86_BUILTIN_PMAXUW128,
30117 IX86_BUILTIN_PMINSB128,
30118 IX86_BUILTIN_PMINSD128,
30119 IX86_BUILTIN_PMINUD128,
30120 IX86_BUILTIN_PMINUW128,
30122 IX86_BUILTIN_PMOVSXBW128,
30123 IX86_BUILTIN_PMOVSXBD128,
30124 IX86_BUILTIN_PMOVSXBQ128,
30125 IX86_BUILTIN_PMOVSXWD128,
30126 IX86_BUILTIN_PMOVSXWQ128,
30127 IX86_BUILTIN_PMOVSXDQ128,
30129 IX86_BUILTIN_PMOVZXBW128,
30130 IX86_BUILTIN_PMOVZXBD128,
30131 IX86_BUILTIN_PMOVZXBQ128,
30132 IX86_BUILTIN_PMOVZXWD128,
30133 IX86_BUILTIN_PMOVZXWQ128,
30134 IX86_BUILTIN_PMOVZXDQ128,
30136 IX86_BUILTIN_PMULDQ128,
30137 IX86_BUILTIN_PMULLD128,
30139 IX86_BUILTIN_ROUNDSD,
30140 IX86_BUILTIN_ROUNDSS,
30142 IX86_BUILTIN_ROUNDPD,
30143 IX86_BUILTIN_ROUNDPS,
30145 IX86_BUILTIN_FLOORPD,
30146 IX86_BUILTIN_CEILPD,
30147 IX86_BUILTIN_TRUNCPD,
30148 IX86_BUILTIN_RINTPD,
30149 IX86_BUILTIN_ROUNDPD_AZ,
30151 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
30152 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
30153 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
30155 IX86_BUILTIN_FLOORPS,
30156 IX86_BUILTIN_CEILPS,
30157 IX86_BUILTIN_TRUNCPS,
30158 IX86_BUILTIN_RINTPS,
30159 IX86_BUILTIN_ROUNDPS_AZ,
30161 IX86_BUILTIN_FLOORPS_SFIX,
30162 IX86_BUILTIN_CEILPS_SFIX,
30163 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
30165 IX86_BUILTIN_PTESTZ,
30166 IX86_BUILTIN_PTESTC,
30167 IX86_BUILTIN_PTESTNZC,
30169 IX86_BUILTIN_VEC_INIT_V2SI,
30170 IX86_BUILTIN_VEC_INIT_V4HI,
30171 IX86_BUILTIN_VEC_INIT_V8QI,
30172 IX86_BUILTIN_VEC_EXT_V2DF,
30173 IX86_BUILTIN_VEC_EXT_V2DI,
30174 IX86_BUILTIN_VEC_EXT_V4SF,
30175 IX86_BUILTIN_VEC_EXT_V4SI,
30176 IX86_BUILTIN_VEC_EXT_V8HI,
30177 IX86_BUILTIN_VEC_EXT_V2SI,
30178 IX86_BUILTIN_VEC_EXT_V4HI,
30179 IX86_BUILTIN_VEC_EXT_V16QI,
30180 IX86_BUILTIN_VEC_SET_V2DI,
30181 IX86_BUILTIN_VEC_SET_V4SF,
30182 IX86_BUILTIN_VEC_SET_V4SI,
30183 IX86_BUILTIN_VEC_SET_V8HI,
30184 IX86_BUILTIN_VEC_SET_V4HI,
30185 IX86_BUILTIN_VEC_SET_V16QI,
30187 IX86_BUILTIN_VEC_PACK_SFIX,
30188 IX86_BUILTIN_VEC_PACK_SFIX256,
30191 IX86_BUILTIN_CRC32QI,
30192 IX86_BUILTIN_CRC32HI,
30193 IX86_BUILTIN_CRC32SI,
30194 IX86_BUILTIN_CRC32DI,
30196 IX86_BUILTIN_PCMPESTRI128,
30197 IX86_BUILTIN_PCMPESTRM128,
30198 IX86_BUILTIN_PCMPESTRA128,
30199 IX86_BUILTIN_PCMPESTRC128,
30200 IX86_BUILTIN_PCMPESTRO128,
30201 IX86_BUILTIN_PCMPESTRS128,
30202 IX86_BUILTIN_PCMPESTRZ128,
30203 IX86_BUILTIN_PCMPISTRI128,
30204 IX86_BUILTIN_PCMPISTRM128,
30205 IX86_BUILTIN_PCMPISTRA128,
30206 IX86_BUILTIN_PCMPISTRC128,
30207 IX86_BUILTIN_PCMPISTRO128,
30208 IX86_BUILTIN_PCMPISTRS128,
30209 IX86_BUILTIN_PCMPISTRZ128,
30211 IX86_BUILTIN_PCMPGTQ,
30213 /* AES instructions */
30214 IX86_BUILTIN_AESENC128,
30215 IX86_BUILTIN_AESENCLAST128,
30216 IX86_BUILTIN_AESDEC128,
30217 IX86_BUILTIN_AESDECLAST128,
30218 IX86_BUILTIN_AESIMC128,
30219 IX86_BUILTIN_AESKEYGENASSIST128,
30221 /* PCLMUL instruction */
30222 IX86_BUILTIN_PCLMULQDQ128,
30225 IX86_BUILTIN_ADDPD256,
30226 IX86_BUILTIN_ADDPS256,
30227 IX86_BUILTIN_ADDSUBPD256,
30228 IX86_BUILTIN_ADDSUBPS256,
30229 IX86_BUILTIN_ANDPD256,
30230 IX86_BUILTIN_ANDPS256,
30231 IX86_BUILTIN_ANDNPD256,
30232 IX86_BUILTIN_ANDNPS256,
30233 IX86_BUILTIN_BLENDPD256,
30234 IX86_BUILTIN_BLENDPS256,
30235 IX86_BUILTIN_BLENDVPD256,
30236 IX86_BUILTIN_BLENDVPS256,
30237 IX86_BUILTIN_DIVPD256,
30238 IX86_BUILTIN_DIVPS256,
30239 IX86_BUILTIN_DPPS256,
30240 IX86_BUILTIN_HADDPD256,
30241 IX86_BUILTIN_HADDPS256,
30242 IX86_BUILTIN_HSUBPD256,
30243 IX86_BUILTIN_HSUBPS256,
30244 IX86_BUILTIN_MAXPD256,
30245 IX86_BUILTIN_MAXPS256,
30246 IX86_BUILTIN_MINPD256,
30247 IX86_BUILTIN_MINPS256,
30248 IX86_BUILTIN_MULPD256,
30249 IX86_BUILTIN_MULPS256,
30250 IX86_BUILTIN_ORPD256,
30251 IX86_BUILTIN_ORPS256,
30252 IX86_BUILTIN_SHUFPD256,
30253 IX86_BUILTIN_SHUFPS256,
30254 IX86_BUILTIN_SUBPD256,
30255 IX86_BUILTIN_SUBPS256,
30256 IX86_BUILTIN_XORPD256,
30257 IX86_BUILTIN_XORPS256,
30258 IX86_BUILTIN_CMPSD,
30259 IX86_BUILTIN_CMPSS,
30260 IX86_BUILTIN_CMPPD,
30261 IX86_BUILTIN_CMPPS,
30262 IX86_BUILTIN_CMPPD256,
30263 IX86_BUILTIN_CMPPS256,
30264 IX86_BUILTIN_CVTDQ2PD256,
30265 IX86_BUILTIN_CVTDQ2PS256,
30266 IX86_BUILTIN_CVTPD2PS256,
30267 IX86_BUILTIN_CVTPS2DQ256,
30268 IX86_BUILTIN_CVTPS2PD256,
30269 IX86_BUILTIN_CVTTPD2DQ256,
30270 IX86_BUILTIN_CVTPD2DQ256,
30271 IX86_BUILTIN_CVTTPS2DQ256,
30272 IX86_BUILTIN_EXTRACTF128PD256,
30273 IX86_BUILTIN_EXTRACTF128PS256,
30274 IX86_BUILTIN_EXTRACTF128SI256,
30275 IX86_BUILTIN_VZEROALL,
30276 IX86_BUILTIN_VZEROUPPER,
30277 IX86_BUILTIN_VPERMILVARPD,
30278 IX86_BUILTIN_VPERMILVARPS,
30279 IX86_BUILTIN_VPERMILVARPD256,
30280 IX86_BUILTIN_VPERMILVARPS256,
30281 IX86_BUILTIN_VPERMILPD,
30282 IX86_BUILTIN_VPERMILPS,
30283 IX86_BUILTIN_VPERMILPD256,
30284 IX86_BUILTIN_VPERMILPS256,
30285 IX86_BUILTIN_VPERMIL2PD,
30286 IX86_BUILTIN_VPERMIL2PS,
30287 IX86_BUILTIN_VPERMIL2PD256,
30288 IX86_BUILTIN_VPERMIL2PS256,
30289 IX86_BUILTIN_VPERM2F128PD256,
30290 IX86_BUILTIN_VPERM2F128PS256,
30291 IX86_BUILTIN_VPERM2F128SI256,
30292 IX86_BUILTIN_VBROADCASTSS,
30293 IX86_BUILTIN_VBROADCASTSD256,
30294 IX86_BUILTIN_VBROADCASTSS256,
30295 IX86_BUILTIN_VBROADCASTPD256,
30296 IX86_BUILTIN_VBROADCASTPS256,
30297 IX86_BUILTIN_VINSERTF128PD256,
30298 IX86_BUILTIN_VINSERTF128PS256,
30299 IX86_BUILTIN_VINSERTF128SI256,
30300 IX86_BUILTIN_LOADUPD256,
30301 IX86_BUILTIN_LOADUPS256,
30302 IX86_BUILTIN_STOREUPD256,
30303 IX86_BUILTIN_STOREUPS256,
30304 IX86_BUILTIN_LDDQU256,
30305 IX86_BUILTIN_MOVNTDQ256,
30306 IX86_BUILTIN_MOVNTPD256,
30307 IX86_BUILTIN_MOVNTPS256,
30308 IX86_BUILTIN_LOADDQU256,
30309 IX86_BUILTIN_STOREDQU256,
30310 IX86_BUILTIN_MASKLOADPD,
30311 IX86_BUILTIN_MASKLOADPS,
30312 IX86_BUILTIN_MASKSTOREPD,
30313 IX86_BUILTIN_MASKSTOREPS,
30314 IX86_BUILTIN_MASKLOADPD256,
30315 IX86_BUILTIN_MASKLOADPS256,
30316 IX86_BUILTIN_MASKSTOREPD256,
30317 IX86_BUILTIN_MASKSTOREPS256,
30318 IX86_BUILTIN_MOVSHDUP256,
30319 IX86_BUILTIN_MOVSLDUP256,
30320 IX86_BUILTIN_MOVDDUP256,
30322 IX86_BUILTIN_SQRTPD256,
30323 IX86_BUILTIN_SQRTPS256,
30324 IX86_BUILTIN_SQRTPS_NR256,
30325 IX86_BUILTIN_RSQRTPS256,
30326 IX86_BUILTIN_RSQRTPS_NR256,
30328 IX86_BUILTIN_RCPPS256,
30330 IX86_BUILTIN_ROUNDPD256,
30331 IX86_BUILTIN_ROUNDPS256,
30333 IX86_BUILTIN_FLOORPD256,
30334 IX86_BUILTIN_CEILPD256,
30335 IX86_BUILTIN_TRUNCPD256,
30336 IX86_BUILTIN_RINTPD256,
30337 IX86_BUILTIN_ROUNDPD_AZ256,
30339 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
30340 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
30341 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
30343 IX86_BUILTIN_FLOORPS256,
30344 IX86_BUILTIN_CEILPS256,
30345 IX86_BUILTIN_TRUNCPS256,
30346 IX86_BUILTIN_RINTPS256,
30347 IX86_BUILTIN_ROUNDPS_AZ256,
30349 IX86_BUILTIN_FLOORPS_SFIX256,
30350 IX86_BUILTIN_CEILPS_SFIX256,
30351 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
30353 IX86_BUILTIN_UNPCKHPD256,
30354 IX86_BUILTIN_UNPCKLPD256,
30355 IX86_BUILTIN_UNPCKHPS256,
30356 IX86_BUILTIN_UNPCKLPS256,
30358 IX86_BUILTIN_SI256_SI,
30359 IX86_BUILTIN_PS256_PS,
30360 IX86_BUILTIN_PD256_PD,
30361 IX86_BUILTIN_SI_SI256,
30362 IX86_BUILTIN_PS_PS256,
30363 IX86_BUILTIN_PD_PD256,
30365 IX86_BUILTIN_VTESTZPD,
30366 IX86_BUILTIN_VTESTCPD,
30367 IX86_BUILTIN_VTESTNZCPD,
30368 IX86_BUILTIN_VTESTZPS,
30369 IX86_BUILTIN_VTESTCPS,
30370 IX86_BUILTIN_VTESTNZCPS,
30371 IX86_BUILTIN_VTESTZPD256,
30372 IX86_BUILTIN_VTESTCPD256,
30373 IX86_BUILTIN_VTESTNZCPD256,
30374 IX86_BUILTIN_VTESTZPS256,
30375 IX86_BUILTIN_VTESTCPS256,
30376 IX86_BUILTIN_VTESTNZCPS256,
30377 IX86_BUILTIN_PTESTZ256,
30378 IX86_BUILTIN_PTESTC256,
30379 IX86_BUILTIN_PTESTNZC256,
30381 IX86_BUILTIN_MOVMSKPD256,
30382 IX86_BUILTIN_MOVMSKPS256,
30385 IX86_BUILTIN_MPSADBW256,
30386 IX86_BUILTIN_PABSB256,
30387 IX86_BUILTIN_PABSW256,
30388 IX86_BUILTIN_PABSD256,
30389 IX86_BUILTIN_PACKSSDW256,
30390 IX86_BUILTIN_PACKSSWB256,
30391 IX86_BUILTIN_PACKUSDW256,
30392 IX86_BUILTIN_PACKUSWB256,
30393 IX86_BUILTIN_PADDB256,
30394 IX86_BUILTIN_PADDW256,
30395 IX86_BUILTIN_PADDD256,
30396 IX86_BUILTIN_PADDQ256,
30397 IX86_BUILTIN_PADDSB256,
30398 IX86_BUILTIN_PADDSW256,
30399 IX86_BUILTIN_PADDUSB256,
30400 IX86_BUILTIN_PADDUSW256,
30401 IX86_BUILTIN_PALIGNR256,
30402 IX86_BUILTIN_AND256I,
30403 IX86_BUILTIN_ANDNOT256I,
30404 IX86_BUILTIN_PAVGB256,
30405 IX86_BUILTIN_PAVGW256,
30406 IX86_BUILTIN_PBLENDVB256,
30407 IX86_BUILTIN_PBLENDVW256,
30408 IX86_BUILTIN_PCMPEQB256,
30409 IX86_BUILTIN_PCMPEQW256,
30410 IX86_BUILTIN_PCMPEQD256,
30411 IX86_BUILTIN_PCMPEQQ256,
30412 IX86_BUILTIN_PCMPGTB256,
30413 IX86_BUILTIN_PCMPGTW256,
30414 IX86_BUILTIN_PCMPGTD256,
30415 IX86_BUILTIN_PCMPGTQ256,
30416 IX86_BUILTIN_PHADDW256,
30417 IX86_BUILTIN_PHADDD256,
30418 IX86_BUILTIN_PHADDSW256,
30419 IX86_BUILTIN_PHSUBW256,
30420 IX86_BUILTIN_PHSUBD256,
30421 IX86_BUILTIN_PHSUBSW256,
30422 IX86_BUILTIN_PMADDUBSW256,
30423 IX86_BUILTIN_PMADDWD256,
30424 IX86_BUILTIN_PMAXSB256,
30425 IX86_BUILTIN_PMAXSW256,
30426 IX86_BUILTIN_PMAXSD256,
30427 IX86_BUILTIN_PMAXUB256,
30428 IX86_BUILTIN_PMAXUW256,
30429 IX86_BUILTIN_PMAXUD256,
30430 IX86_BUILTIN_PMINSB256,
30431 IX86_BUILTIN_PMINSW256,
30432 IX86_BUILTIN_PMINSD256,
30433 IX86_BUILTIN_PMINUB256,
30434 IX86_BUILTIN_PMINUW256,
30435 IX86_BUILTIN_PMINUD256,
30436 IX86_BUILTIN_PMOVMSKB256,
30437 IX86_BUILTIN_PMOVSXBW256,
30438 IX86_BUILTIN_PMOVSXBD256,
30439 IX86_BUILTIN_PMOVSXBQ256,
30440 IX86_BUILTIN_PMOVSXWD256,
30441 IX86_BUILTIN_PMOVSXWQ256,
30442 IX86_BUILTIN_PMOVSXDQ256,
30443 IX86_BUILTIN_PMOVZXBW256,
30444 IX86_BUILTIN_PMOVZXBD256,
30445 IX86_BUILTIN_PMOVZXBQ256,
30446 IX86_BUILTIN_PMOVZXWD256,
30447 IX86_BUILTIN_PMOVZXWQ256,
30448 IX86_BUILTIN_PMOVZXDQ256,
30449 IX86_BUILTIN_PMULDQ256,
30450 IX86_BUILTIN_PMULHRSW256,
30451 IX86_BUILTIN_PMULHUW256,
30452 IX86_BUILTIN_PMULHW256,
30453 IX86_BUILTIN_PMULLW256,
30454 IX86_BUILTIN_PMULLD256,
30455 IX86_BUILTIN_PMULUDQ256,
30456 IX86_BUILTIN_POR256,
30457 IX86_BUILTIN_PSADBW256,
30458 IX86_BUILTIN_PSHUFB256,
30459 IX86_BUILTIN_PSHUFD256,
30460 IX86_BUILTIN_PSHUFHW256,
30461 IX86_BUILTIN_PSHUFLW256,
30462 IX86_BUILTIN_PSIGNB256,
30463 IX86_BUILTIN_PSIGNW256,
30464 IX86_BUILTIN_PSIGND256,
30465 IX86_BUILTIN_PSLLDQI256,
30466 IX86_BUILTIN_PSLLWI256,
30467 IX86_BUILTIN_PSLLW256,
30468 IX86_BUILTIN_PSLLDI256,
30469 IX86_BUILTIN_PSLLD256,
30470 IX86_BUILTIN_PSLLQI256,
30471 IX86_BUILTIN_PSLLQ256,
30472 IX86_BUILTIN_PSRAWI256,
30473 IX86_BUILTIN_PSRAW256,
30474 IX86_BUILTIN_PSRADI256,
30475 IX86_BUILTIN_PSRAD256,
30476 IX86_BUILTIN_PSRLDQI256,
30477 IX86_BUILTIN_PSRLWI256,
30478 IX86_BUILTIN_PSRLW256,
30479 IX86_BUILTIN_PSRLDI256,
30480 IX86_BUILTIN_PSRLD256,
30481 IX86_BUILTIN_PSRLQI256,
30482 IX86_BUILTIN_PSRLQ256,
30483 IX86_BUILTIN_PSUBB256,
30484 IX86_BUILTIN_PSUBW256,
30485 IX86_BUILTIN_PSUBD256,
30486 IX86_BUILTIN_PSUBQ256,
30487 IX86_BUILTIN_PSUBSB256,
30488 IX86_BUILTIN_PSUBSW256,
30489 IX86_BUILTIN_PSUBUSB256,
30490 IX86_BUILTIN_PSUBUSW256,
30491 IX86_BUILTIN_PUNPCKHBW256,
30492 IX86_BUILTIN_PUNPCKHWD256,
30493 IX86_BUILTIN_PUNPCKHDQ256,
30494 IX86_BUILTIN_PUNPCKHQDQ256,
30495 IX86_BUILTIN_PUNPCKLBW256,
30496 IX86_BUILTIN_PUNPCKLWD256,
30497 IX86_BUILTIN_PUNPCKLDQ256,
30498 IX86_BUILTIN_PUNPCKLQDQ256,
30499 IX86_BUILTIN_PXOR256,
30500 IX86_BUILTIN_MOVNTDQA256,
30501 IX86_BUILTIN_VBROADCASTSS_PS,
30502 IX86_BUILTIN_VBROADCASTSS_PS256,
30503 IX86_BUILTIN_VBROADCASTSD_PD256,
30504 IX86_BUILTIN_VBROADCASTSI256,
30505 IX86_BUILTIN_PBLENDD256,
30506 IX86_BUILTIN_PBLENDD128,
30507 IX86_BUILTIN_PBROADCASTB256,
30508 IX86_BUILTIN_PBROADCASTW256,
30509 IX86_BUILTIN_PBROADCASTD256,
30510 IX86_BUILTIN_PBROADCASTQ256,
30511 IX86_BUILTIN_PBROADCASTB128,
30512 IX86_BUILTIN_PBROADCASTW128,
30513 IX86_BUILTIN_PBROADCASTD128,
30514 IX86_BUILTIN_PBROADCASTQ128,
30515 IX86_BUILTIN_VPERMVARSI256,
30516 IX86_BUILTIN_VPERMDF256,
30517 IX86_BUILTIN_VPERMVARSF256,
30518 IX86_BUILTIN_VPERMDI256,
30519 IX86_BUILTIN_VPERMTI256,
30520 IX86_BUILTIN_VEXTRACT128I256,
30521 IX86_BUILTIN_VINSERT128I256,
30522 IX86_BUILTIN_MASKLOADD,
30523 IX86_BUILTIN_MASKLOADQ,
30524 IX86_BUILTIN_MASKLOADD256,
30525 IX86_BUILTIN_MASKLOADQ256,
30526 IX86_BUILTIN_MASKSTORED,
30527 IX86_BUILTIN_MASKSTOREQ,
30528 IX86_BUILTIN_MASKSTORED256,
30529 IX86_BUILTIN_MASKSTOREQ256,
30530 IX86_BUILTIN_PSLLVV4DI,
30531 IX86_BUILTIN_PSLLVV2DI,
30532 IX86_BUILTIN_PSLLVV8SI,
30533 IX86_BUILTIN_PSLLVV4SI,
30534 IX86_BUILTIN_PSRAVV8SI,
30535 IX86_BUILTIN_PSRAVV4SI,
30536 IX86_BUILTIN_PSRLVV4DI,
30537 IX86_BUILTIN_PSRLVV2DI,
30538 IX86_BUILTIN_PSRLVV8SI,
30539 IX86_BUILTIN_PSRLVV4SI,
30541 IX86_BUILTIN_GATHERSIV2DF,
30542 IX86_BUILTIN_GATHERSIV4DF,
30543 IX86_BUILTIN_GATHERDIV2DF,
30544 IX86_BUILTIN_GATHERDIV4DF,
30545 IX86_BUILTIN_GATHERSIV4SF,
30546 IX86_BUILTIN_GATHERSIV8SF,
30547 IX86_BUILTIN_GATHERDIV4SF,
30548 IX86_BUILTIN_GATHERDIV8SF,
30549 IX86_BUILTIN_GATHERSIV2DI,
30550 IX86_BUILTIN_GATHERSIV4DI,
30551 IX86_BUILTIN_GATHERDIV2DI,
30552 IX86_BUILTIN_GATHERDIV4DI,
30553 IX86_BUILTIN_GATHERSIV4SI,
30554 IX86_BUILTIN_GATHERSIV8SI,
30555 IX86_BUILTIN_GATHERDIV4SI,
30556 IX86_BUILTIN_GATHERDIV8SI,
30559 IX86_BUILTIN_SI512_SI256,
30560 IX86_BUILTIN_PD512_PD256,
30561 IX86_BUILTIN_PS512_PS256,
30562 IX86_BUILTIN_SI512_SI,
30563 IX86_BUILTIN_PD512_PD,
30564 IX86_BUILTIN_PS512_PS,
30565 IX86_BUILTIN_ADDPD512,
30566 IX86_BUILTIN_ADDPS512,
30567 IX86_BUILTIN_ADDSD_ROUND,
30568 IX86_BUILTIN_ADDSS_ROUND,
30569 IX86_BUILTIN_ALIGND512,
30570 IX86_BUILTIN_ALIGNQ512,
30571 IX86_BUILTIN_BLENDMD512,
30572 IX86_BUILTIN_BLENDMPD512,
30573 IX86_BUILTIN_BLENDMPS512,
30574 IX86_BUILTIN_BLENDMQ512,
30575 IX86_BUILTIN_BROADCASTF32X4_512,
30576 IX86_BUILTIN_BROADCASTF64X4_512,
30577 IX86_BUILTIN_BROADCASTI32X4_512,
30578 IX86_BUILTIN_BROADCASTI64X4_512,
30579 IX86_BUILTIN_BROADCASTSD512,
30580 IX86_BUILTIN_BROADCASTSS512,
30581 IX86_BUILTIN_CMPD512,
30582 IX86_BUILTIN_CMPPD512,
30583 IX86_BUILTIN_CMPPS512,
30584 IX86_BUILTIN_CMPQ512,
30585 IX86_BUILTIN_CMPSD_MASK,
30586 IX86_BUILTIN_CMPSS_MASK,
30587 IX86_BUILTIN_COMIDF,
30588 IX86_BUILTIN_COMISF,
30589 IX86_BUILTIN_COMPRESSPD512,
30590 IX86_BUILTIN_COMPRESSPDSTORE512,
30591 IX86_BUILTIN_COMPRESSPS512,
30592 IX86_BUILTIN_COMPRESSPSSTORE512,
30593 IX86_BUILTIN_CVTDQ2PD512,
30594 IX86_BUILTIN_CVTDQ2PS512,
30595 IX86_BUILTIN_CVTPD2DQ512,
30596 IX86_BUILTIN_CVTPD2PS512,
30597 IX86_BUILTIN_CVTPD2UDQ512,
30598 IX86_BUILTIN_CVTPH2PS512,
30599 IX86_BUILTIN_CVTPS2DQ512,
30600 IX86_BUILTIN_CVTPS2PD512,
30601 IX86_BUILTIN_CVTPS2PH512,
30602 IX86_BUILTIN_CVTPS2UDQ512,
30603 IX86_BUILTIN_CVTSD2SS_ROUND,
30604 IX86_BUILTIN_CVTSI2SD64,
30605 IX86_BUILTIN_CVTSI2SS32,
30606 IX86_BUILTIN_CVTSI2SS64,
30607 IX86_BUILTIN_CVTSS2SD_ROUND,
30608 IX86_BUILTIN_CVTTPD2DQ512,
30609 IX86_BUILTIN_CVTTPD2UDQ512,
30610 IX86_BUILTIN_CVTTPS2DQ512,
30611 IX86_BUILTIN_CVTTPS2UDQ512,
30612 IX86_BUILTIN_CVTUDQ2PD512,
30613 IX86_BUILTIN_CVTUDQ2PS512,
30614 IX86_BUILTIN_CVTUSI2SD32,
30615 IX86_BUILTIN_CVTUSI2SD64,
30616 IX86_BUILTIN_CVTUSI2SS32,
30617 IX86_BUILTIN_CVTUSI2SS64,
30618 IX86_BUILTIN_DIVPD512,
30619 IX86_BUILTIN_DIVPS512,
30620 IX86_BUILTIN_DIVSD_ROUND,
30621 IX86_BUILTIN_DIVSS_ROUND,
30622 IX86_BUILTIN_EXPANDPD512,
30623 IX86_BUILTIN_EXPANDPD512Z,
30624 IX86_BUILTIN_EXPANDPDLOAD512,
30625 IX86_BUILTIN_EXPANDPDLOAD512Z,
30626 IX86_BUILTIN_EXPANDPS512,
30627 IX86_BUILTIN_EXPANDPS512Z,
30628 IX86_BUILTIN_EXPANDPSLOAD512,
30629 IX86_BUILTIN_EXPANDPSLOAD512Z,
30630 IX86_BUILTIN_EXTRACTF32X4,
30631 IX86_BUILTIN_EXTRACTF64X4,
30632 IX86_BUILTIN_EXTRACTI32X4,
30633 IX86_BUILTIN_EXTRACTI64X4,
30634 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30635 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30636 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30637 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30638 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30639 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30640 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30641 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30642 IX86_BUILTIN_GETEXPPD512,
30643 IX86_BUILTIN_GETEXPPS512,
30644 IX86_BUILTIN_GETEXPSD128,
30645 IX86_BUILTIN_GETEXPSS128,
30646 IX86_BUILTIN_GETMANTPD512,
30647 IX86_BUILTIN_GETMANTPS512,
30648 IX86_BUILTIN_GETMANTSD128,
30649 IX86_BUILTIN_GETMANTSS128,
30650 IX86_BUILTIN_INSERTF32X4,
30651 IX86_BUILTIN_INSERTF64X4,
30652 IX86_BUILTIN_INSERTI32X4,
30653 IX86_BUILTIN_INSERTI64X4,
30654 IX86_BUILTIN_LOADAPD512,
30655 IX86_BUILTIN_LOADAPS512,
30656 IX86_BUILTIN_LOADDQUDI512,
30657 IX86_BUILTIN_LOADDQUSI512,
30658 IX86_BUILTIN_LOADUPD512,
30659 IX86_BUILTIN_LOADUPS512,
30660 IX86_BUILTIN_MAXPD512,
30661 IX86_BUILTIN_MAXPS512,
30662 IX86_BUILTIN_MAXSD_ROUND,
30663 IX86_BUILTIN_MAXSS_ROUND,
30664 IX86_BUILTIN_MINPD512,
30665 IX86_BUILTIN_MINPS512,
30666 IX86_BUILTIN_MINSD_ROUND,
30667 IX86_BUILTIN_MINSS_ROUND,
30668 IX86_BUILTIN_MOVAPD512,
30669 IX86_BUILTIN_MOVAPS512,
30670 IX86_BUILTIN_MOVDDUP512,
30671 IX86_BUILTIN_MOVDQA32LOAD512,
30672 IX86_BUILTIN_MOVDQA32STORE512,
30673 IX86_BUILTIN_MOVDQA32_512,
30674 IX86_BUILTIN_MOVDQA64LOAD512,
30675 IX86_BUILTIN_MOVDQA64STORE512,
30676 IX86_BUILTIN_MOVDQA64_512,
30677 IX86_BUILTIN_MOVNTDQ512,
30678 IX86_BUILTIN_MOVNTDQA512,
30679 IX86_BUILTIN_MOVNTPD512,
30680 IX86_BUILTIN_MOVNTPS512,
30681 IX86_BUILTIN_MOVSHDUP512,
30682 IX86_BUILTIN_MOVSLDUP512,
30683 IX86_BUILTIN_MULPD512,
30684 IX86_BUILTIN_MULPS512,
30685 IX86_BUILTIN_MULSD_ROUND,
30686 IX86_BUILTIN_MULSS_ROUND,
30687 IX86_BUILTIN_PABSD512,
30688 IX86_BUILTIN_PABSQ512,
30689 IX86_BUILTIN_PADDD512,
30690 IX86_BUILTIN_PADDQ512,
30691 IX86_BUILTIN_PANDD512,
30692 IX86_BUILTIN_PANDND512,
30693 IX86_BUILTIN_PANDNQ512,
30694 IX86_BUILTIN_PANDQ512,
30695 IX86_BUILTIN_PBROADCASTD512,
30696 IX86_BUILTIN_PBROADCASTD512_GPR,
30697 IX86_BUILTIN_PBROADCASTMB512,
30698 IX86_BUILTIN_PBROADCASTMW512,
30699 IX86_BUILTIN_PBROADCASTQ512,
30700 IX86_BUILTIN_PBROADCASTQ512_GPR,
30701 IX86_BUILTIN_PCMPEQD512_MASK,
30702 IX86_BUILTIN_PCMPEQQ512_MASK,
30703 IX86_BUILTIN_PCMPGTD512_MASK,
30704 IX86_BUILTIN_PCMPGTQ512_MASK,
30705 IX86_BUILTIN_PCOMPRESSD512,
30706 IX86_BUILTIN_PCOMPRESSDSTORE512,
30707 IX86_BUILTIN_PCOMPRESSQ512,
30708 IX86_BUILTIN_PCOMPRESSQSTORE512,
30709 IX86_BUILTIN_PEXPANDD512,
30710 IX86_BUILTIN_PEXPANDD512Z,
30711 IX86_BUILTIN_PEXPANDDLOAD512,
30712 IX86_BUILTIN_PEXPANDDLOAD512Z,
30713 IX86_BUILTIN_PEXPANDQ512,
30714 IX86_BUILTIN_PEXPANDQ512Z,
30715 IX86_BUILTIN_PEXPANDQLOAD512,
30716 IX86_BUILTIN_PEXPANDQLOAD512Z,
30717 IX86_BUILTIN_PMAXSD512,
30718 IX86_BUILTIN_PMAXSQ512,
30719 IX86_BUILTIN_PMAXUD512,
30720 IX86_BUILTIN_PMAXUQ512,
30721 IX86_BUILTIN_PMINSD512,
30722 IX86_BUILTIN_PMINSQ512,
30723 IX86_BUILTIN_PMINUD512,
30724 IX86_BUILTIN_PMINUQ512,
30725 IX86_BUILTIN_PMOVDB512,
30726 IX86_BUILTIN_PMOVDB512_MEM,
30727 IX86_BUILTIN_PMOVDW512,
30728 IX86_BUILTIN_PMOVDW512_MEM,
30729 IX86_BUILTIN_PMOVQB512,
30730 IX86_BUILTIN_PMOVQB512_MEM,
30731 IX86_BUILTIN_PMOVQD512,
30732 IX86_BUILTIN_PMOVQD512_MEM,
30733 IX86_BUILTIN_PMOVQW512,
30734 IX86_BUILTIN_PMOVQW512_MEM,
30735 IX86_BUILTIN_PMOVSDB512,
30736 IX86_BUILTIN_PMOVSDB512_MEM,
30737 IX86_BUILTIN_PMOVSDW512,
30738 IX86_BUILTIN_PMOVSDW512_MEM,
30739 IX86_BUILTIN_PMOVSQB512,
30740 IX86_BUILTIN_PMOVSQB512_MEM,
30741 IX86_BUILTIN_PMOVSQD512,
30742 IX86_BUILTIN_PMOVSQD512_MEM,
30743 IX86_BUILTIN_PMOVSQW512,
30744 IX86_BUILTIN_PMOVSQW512_MEM,
30745 IX86_BUILTIN_PMOVSXBD512,
30746 IX86_BUILTIN_PMOVSXBQ512,
30747 IX86_BUILTIN_PMOVSXDQ512,
30748 IX86_BUILTIN_PMOVSXWD512,
30749 IX86_BUILTIN_PMOVSXWQ512,
30750 IX86_BUILTIN_PMOVUSDB512,
30751 IX86_BUILTIN_PMOVUSDB512_MEM,
30752 IX86_BUILTIN_PMOVUSDW512,
30753 IX86_BUILTIN_PMOVUSDW512_MEM,
30754 IX86_BUILTIN_PMOVUSQB512,
30755 IX86_BUILTIN_PMOVUSQB512_MEM,
30756 IX86_BUILTIN_PMOVUSQD512,
30757 IX86_BUILTIN_PMOVUSQD512_MEM,
30758 IX86_BUILTIN_PMOVUSQW512,
30759 IX86_BUILTIN_PMOVUSQW512_MEM,
30760 IX86_BUILTIN_PMOVZXBD512,
30761 IX86_BUILTIN_PMOVZXBQ512,
30762 IX86_BUILTIN_PMOVZXDQ512,
30763 IX86_BUILTIN_PMOVZXWD512,
30764 IX86_BUILTIN_PMOVZXWQ512,
30765 IX86_BUILTIN_PMULDQ512,
30766 IX86_BUILTIN_PMULLD512,
30767 IX86_BUILTIN_PMULUDQ512,
30768 IX86_BUILTIN_PORD512,
30769 IX86_BUILTIN_PORQ512,
30770 IX86_BUILTIN_PROLD512,
30771 IX86_BUILTIN_PROLQ512,
30772 IX86_BUILTIN_PROLVD512,
30773 IX86_BUILTIN_PROLVQ512,
30774 IX86_BUILTIN_PRORD512,
30775 IX86_BUILTIN_PRORQ512,
30776 IX86_BUILTIN_PRORVD512,
30777 IX86_BUILTIN_PRORVQ512,
30778 IX86_BUILTIN_PSHUFD512,
30779 IX86_BUILTIN_PSLLD512,
30780 IX86_BUILTIN_PSLLDI512,
30781 IX86_BUILTIN_PSLLQ512,
30782 IX86_BUILTIN_PSLLQI512,
30783 IX86_BUILTIN_PSLLVV16SI,
30784 IX86_BUILTIN_PSLLVV8DI,
30785 IX86_BUILTIN_PSRAD512,
30786 IX86_BUILTIN_PSRADI512,
30787 IX86_BUILTIN_PSRAQ512,
30788 IX86_BUILTIN_PSRAQI512,
30789 IX86_BUILTIN_PSRAVV16SI,
30790 IX86_BUILTIN_PSRAVV8DI,
30791 IX86_BUILTIN_PSRLD512,
30792 IX86_BUILTIN_PSRLDI512,
30793 IX86_BUILTIN_PSRLQ512,
30794 IX86_BUILTIN_PSRLQI512,
30795 IX86_BUILTIN_PSRLVV16SI,
30796 IX86_BUILTIN_PSRLVV8DI,
30797 IX86_BUILTIN_PSUBD512,
30798 IX86_BUILTIN_PSUBQ512,
30799 IX86_BUILTIN_PTESTMD512,
30800 IX86_BUILTIN_PTESTMQ512,
30801 IX86_BUILTIN_PTESTNMD512,
30802 IX86_BUILTIN_PTESTNMQ512,
30803 IX86_BUILTIN_PUNPCKHDQ512,
30804 IX86_BUILTIN_PUNPCKHQDQ512,
30805 IX86_BUILTIN_PUNPCKLDQ512,
30806 IX86_BUILTIN_PUNPCKLQDQ512,
30807 IX86_BUILTIN_PXORD512,
30808 IX86_BUILTIN_PXORQ512,
30809 IX86_BUILTIN_RCP14PD512,
30810 IX86_BUILTIN_RCP14PS512,
30811 IX86_BUILTIN_RCP14SD,
30812 IX86_BUILTIN_RCP14SS,
30813 IX86_BUILTIN_RNDSCALEPD,
30814 IX86_BUILTIN_RNDSCALEPS,
30815 IX86_BUILTIN_RNDSCALESD,
30816 IX86_BUILTIN_RNDSCALESS,
30817 IX86_BUILTIN_RSQRT14PD512,
30818 IX86_BUILTIN_RSQRT14PS512,
30819 IX86_BUILTIN_RSQRT14SD,
30820 IX86_BUILTIN_RSQRT14SS,
30821 IX86_BUILTIN_SCALEFPD512,
30822 IX86_BUILTIN_SCALEFPS512,
30823 IX86_BUILTIN_SCALEFSD,
30824 IX86_BUILTIN_SCALEFSS,
30825 IX86_BUILTIN_SHUFPD512,
30826 IX86_BUILTIN_SHUFPS512,
30827 IX86_BUILTIN_SHUF_F32x4,
30828 IX86_BUILTIN_SHUF_F64x2,
30829 IX86_BUILTIN_SHUF_I32x4,
30830 IX86_BUILTIN_SHUF_I64x2,
30831 IX86_BUILTIN_SQRTPD512,
30832 IX86_BUILTIN_SQRTPD512_MASK,
30833 IX86_BUILTIN_SQRTPS512_MASK,
30834 IX86_BUILTIN_SQRTPS_NR512,
30835 IX86_BUILTIN_SQRTSD_ROUND,
30836 IX86_BUILTIN_SQRTSS_ROUND,
30837 IX86_BUILTIN_STOREAPD512,
30838 IX86_BUILTIN_STOREAPS512,
30839 IX86_BUILTIN_STOREDQUDI512,
30840 IX86_BUILTIN_STOREDQUSI512,
30841 IX86_BUILTIN_STOREUPD512,
30842 IX86_BUILTIN_STOREUPS512,
30843 IX86_BUILTIN_SUBPD512,
30844 IX86_BUILTIN_SUBPS512,
30845 IX86_BUILTIN_SUBSD_ROUND,
30846 IX86_BUILTIN_SUBSS_ROUND,
30847 IX86_BUILTIN_UCMPD512,
30848 IX86_BUILTIN_UCMPQ512,
30849 IX86_BUILTIN_UNPCKHPD512,
30850 IX86_BUILTIN_UNPCKHPS512,
30851 IX86_BUILTIN_UNPCKLPD512,
30852 IX86_BUILTIN_UNPCKLPS512,
30853 IX86_BUILTIN_VCVTSD2SI32,
30854 IX86_BUILTIN_VCVTSD2SI64,
30855 IX86_BUILTIN_VCVTSD2USI32,
30856 IX86_BUILTIN_VCVTSD2USI64,
30857 IX86_BUILTIN_VCVTSS2SI32,
30858 IX86_BUILTIN_VCVTSS2SI64,
30859 IX86_BUILTIN_VCVTSS2USI32,
30860 IX86_BUILTIN_VCVTSS2USI64,
30861 IX86_BUILTIN_VCVTTSD2SI32,
30862 IX86_BUILTIN_VCVTTSD2SI64,
30863 IX86_BUILTIN_VCVTTSD2USI32,
30864 IX86_BUILTIN_VCVTTSD2USI64,
30865 IX86_BUILTIN_VCVTTSS2SI32,
30866 IX86_BUILTIN_VCVTTSS2SI64,
30867 IX86_BUILTIN_VCVTTSS2USI32,
30868 IX86_BUILTIN_VCVTTSS2USI64,
30869 IX86_BUILTIN_VFMADDPD512_MASK,
30870 IX86_BUILTIN_VFMADDPD512_MASK3,
30871 IX86_BUILTIN_VFMADDPD512_MASKZ,
30872 IX86_BUILTIN_VFMADDPS512_MASK,
30873 IX86_BUILTIN_VFMADDPS512_MASK3,
30874 IX86_BUILTIN_VFMADDPS512_MASKZ,
30875 IX86_BUILTIN_VFMADDSD3_ROUND,
30876 IX86_BUILTIN_VFMADDSS3_ROUND,
30877 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30878 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30879 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30880 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30881 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30882 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30883 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30884 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30885 IX86_BUILTIN_VFMSUBPD512_MASK3,
30886 IX86_BUILTIN_VFMSUBPS512_MASK3,
30887 IX86_BUILTIN_VFMSUBSD3_MASK3,
30888 IX86_BUILTIN_VFMSUBSS3_MASK3,
30889 IX86_BUILTIN_VFNMADDPD512_MASK,
30890 IX86_BUILTIN_VFNMADDPS512_MASK,
30891 IX86_BUILTIN_VFNMSUBPD512_MASK,
30892 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30893 IX86_BUILTIN_VFNMSUBPS512_MASK,
30894 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30895 IX86_BUILTIN_VPCLZCNTD512,
30896 IX86_BUILTIN_VPCLZCNTQ512,
30897 IX86_BUILTIN_VPCONFLICTD512,
30898 IX86_BUILTIN_VPCONFLICTQ512,
30899 IX86_BUILTIN_VPERMDF512,
30900 IX86_BUILTIN_VPERMDI512,
30901 IX86_BUILTIN_VPERMI2VARD512,
30902 IX86_BUILTIN_VPERMI2VARPD512,
30903 IX86_BUILTIN_VPERMI2VARPS512,
30904 IX86_BUILTIN_VPERMI2VARQ512,
30905 IX86_BUILTIN_VPERMILPD512,
30906 IX86_BUILTIN_VPERMILPS512,
30907 IX86_BUILTIN_VPERMILVARPD512,
30908 IX86_BUILTIN_VPERMILVARPS512,
30909 IX86_BUILTIN_VPERMT2VARD512,
30910 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30911 IX86_BUILTIN_VPERMT2VARPD512,
30912 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
30913 IX86_BUILTIN_VPERMT2VARPS512,
30914 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
30915 IX86_BUILTIN_VPERMT2VARQ512,
30916 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
30917 IX86_BUILTIN_VPERMVARDF512,
30918 IX86_BUILTIN_VPERMVARDI512,
30919 IX86_BUILTIN_VPERMVARSF512,
30920 IX86_BUILTIN_VPERMVARSI512,
30921 IX86_BUILTIN_VTERNLOGD512_MASK,
30922 IX86_BUILTIN_VTERNLOGD512_MASKZ,
30923 IX86_BUILTIN_VTERNLOGQ512_MASK,
30924 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
30926 /* Mask arithmetic operations */
30927 IX86_BUILTIN_KAND16,
30928 IX86_BUILTIN_KANDN16,
30929 IX86_BUILTIN_KNOT16,
30930 IX86_BUILTIN_KOR16,
30931 IX86_BUILTIN_KORTESTC16,
30932 IX86_BUILTIN_KORTESTZ16,
30933 IX86_BUILTIN_KUNPCKBW,
30934 IX86_BUILTIN_KXNOR16,
30935 IX86_BUILTIN_KXOR16,
30936 IX86_BUILTIN_KMOV16,
30939 IX86_BUILTIN_PMOVUSQD256_MEM,
30940 IX86_BUILTIN_PMOVUSQD128_MEM,
30941 IX86_BUILTIN_PMOVSQD256_MEM,
30942 IX86_BUILTIN_PMOVSQD128_MEM,
30943 IX86_BUILTIN_PMOVQD256_MEM,
30944 IX86_BUILTIN_PMOVQD128_MEM,
30945 IX86_BUILTIN_PMOVUSQW256_MEM,
30946 IX86_BUILTIN_PMOVUSQW128_MEM,
30947 IX86_BUILTIN_PMOVSQW256_MEM,
30948 IX86_BUILTIN_PMOVSQW128_MEM,
30949 IX86_BUILTIN_PMOVQW256_MEM,
30950 IX86_BUILTIN_PMOVQW128_MEM,
30951 IX86_BUILTIN_PMOVUSQB256_MEM,
30952 IX86_BUILTIN_PMOVUSQB128_MEM,
30953 IX86_BUILTIN_PMOVSQB256_MEM,
30954 IX86_BUILTIN_PMOVSQB128_MEM,
30955 IX86_BUILTIN_PMOVQB256_MEM,
30956 IX86_BUILTIN_PMOVQB128_MEM,
30957 IX86_BUILTIN_PMOVUSDW256_MEM,
30958 IX86_BUILTIN_PMOVUSDW128_MEM,
30959 IX86_BUILTIN_PMOVSDW256_MEM,
30960 IX86_BUILTIN_PMOVSDW128_MEM,
30961 IX86_BUILTIN_PMOVDW256_MEM,
30962 IX86_BUILTIN_PMOVDW128_MEM,
30963 IX86_BUILTIN_PMOVUSDB256_MEM,
30964 IX86_BUILTIN_PMOVUSDB128_MEM,
30965 IX86_BUILTIN_PMOVSDB256_MEM,
30966 IX86_BUILTIN_PMOVSDB128_MEM,
30967 IX86_BUILTIN_PMOVDB256_MEM,
30968 IX86_BUILTIN_PMOVDB128_MEM,
30969 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
30970 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
30971 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
30972 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
30973 IX86_BUILTIN_MOVDQA64STORE256_MASK,
30974 IX86_BUILTIN_MOVDQA64STORE128_MASK,
30975 IX86_BUILTIN_MOVDQA32STORE256_MASK,
30976 IX86_BUILTIN_MOVDQA32STORE128_MASK,
30977 IX86_BUILTIN_LOADAPD256_MASK,
30978 IX86_BUILTIN_LOADAPD128_MASK,
30979 IX86_BUILTIN_LOADAPS256_MASK,
30980 IX86_BUILTIN_LOADAPS128_MASK,
30981 IX86_BUILTIN_STOREAPD256_MASK,
30982 IX86_BUILTIN_STOREAPD128_MASK,
30983 IX86_BUILTIN_STOREAPS256_MASK,
30984 IX86_BUILTIN_STOREAPS128_MASK,
30985 IX86_BUILTIN_LOADUPD256_MASK,
30986 IX86_BUILTIN_LOADUPD128_MASK,
30987 IX86_BUILTIN_LOADUPS256_MASK,
30988 IX86_BUILTIN_LOADUPS128_MASK,
30989 IX86_BUILTIN_STOREUPD256_MASK,
30990 IX86_BUILTIN_STOREUPD128_MASK,
30991 IX86_BUILTIN_STOREUPS256_MASK,
30992 IX86_BUILTIN_STOREUPS128_MASK,
30993 IX86_BUILTIN_LOADDQUDI256_MASK,
30994 IX86_BUILTIN_LOADDQUDI128_MASK,
30995 IX86_BUILTIN_LOADDQUSI256_MASK,
30996 IX86_BUILTIN_LOADDQUSI128_MASK,
30997 IX86_BUILTIN_LOADDQUHI256_MASK,
30998 IX86_BUILTIN_LOADDQUHI128_MASK,
30999 IX86_BUILTIN_LOADDQUQI256_MASK,
31000 IX86_BUILTIN_LOADDQUQI128_MASK,
31001 IX86_BUILTIN_STOREDQUDI256_MASK,
31002 IX86_BUILTIN_STOREDQUDI128_MASK,
31003 IX86_BUILTIN_STOREDQUSI256_MASK,
31004 IX86_BUILTIN_STOREDQUSI128_MASK,
31005 IX86_BUILTIN_STOREDQUHI256_MASK,
31006 IX86_BUILTIN_STOREDQUHI128_MASK,
31007 IX86_BUILTIN_STOREDQUQI256_MASK,
31008 IX86_BUILTIN_STOREDQUQI128_MASK,
31009 IX86_BUILTIN_COMPRESSPDSTORE256,
31010 IX86_BUILTIN_COMPRESSPDSTORE128,
31011 IX86_BUILTIN_COMPRESSPSSTORE256,
31012 IX86_BUILTIN_COMPRESSPSSTORE128,
31013 IX86_BUILTIN_PCOMPRESSQSTORE256,
31014 IX86_BUILTIN_PCOMPRESSQSTORE128,
31015 IX86_BUILTIN_PCOMPRESSDSTORE256,
31016 IX86_BUILTIN_PCOMPRESSDSTORE128,
31017 IX86_BUILTIN_EXPANDPDLOAD256,
31018 IX86_BUILTIN_EXPANDPDLOAD128,
31019 IX86_BUILTIN_EXPANDPSLOAD256,
31020 IX86_BUILTIN_EXPANDPSLOAD128,
31021 IX86_BUILTIN_PEXPANDQLOAD256,
31022 IX86_BUILTIN_PEXPANDQLOAD128,
31023 IX86_BUILTIN_PEXPANDDLOAD256,
31024 IX86_BUILTIN_PEXPANDDLOAD128,
31025 IX86_BUILTIN_EXPANDPDLOAD256Z,
31026 IX86_BUILTIN_EXPANDPDLOAD128Z,
31027 IX86_BUILTIN_EXPANDPSLOAD256Z,
31028 IX86_BUILTIN_EXPANDPSLOAD128Z,
31029 IX86_BUILTIN_PEXPANDQLOAD256Z,
31030 IX86_BUILTIN_PEXPANDQLOAD128Z,
31031 IX86_BUILTIN_PEXPANDDLOAD256Z,
31032 IX86_BUILTIN_PEXPANDDLOAD128Z,
31033 IX86_BUILTIN_PALIGNR256_MASK,
31034 IX86_BUILTIN_PALIGNR128_MASK,
31035 IX86_BUILTIN_MOVDQA64_256_MASK,
31036 IX86_BUILTIN_MOVDQA64_128_MASK,
31037 IX86_BUILTIN_MOVDQA32_256_MASK,
31038 IX86_BUILTIN_MOVDQA32_128_MASK,
31039 IX86_BUILTIN_MOVAPD256_MASK,
31040 IX86_BUILTIN_MOVAPD128_MASK,
31041 IX86_BUILTIN_MOVAPS256_MASK,
31042 IX86_BUILTIN_MOVAPS128_MASK,
31043 IX86_BUILTIN_MOVDQUHI256_MASK,
31044 IX86_BUILTIN_MOVDQUHI128_MASK,
31045 IX86_BUILTIN_MOVDQUQI256_MASK,
31046 IX86_BUILTIN_MOVDQUQI128_MASK,
31047 IX86_BUILTIN_MINPS128_MASK,
31048 IX86_BUILTIN_MAXPS128_MASK,
31049 IX86_BUILTIN_MINPD128_MASK,
31050 IX86_BUILTIN_MAXPD128_MASK,
31051 IX86_BUILTIN_MAXPD256_MASK,
31052 IX86_BUILTIN_MAXPS256_MASK,
31053 IX86_BUILTIN_MINPD256_MASK,
31054 IX86_BUILTIN_MINPS256_MASK,
31055 IX86_BUILTIN_MULPS128_MASK,
31056 IX86_BUILTIN_DIVPS128_MASK,
31057 IX86_BUILTIN_MULPD128_MASK,
31058 IX86_BUILTIN_DIVPD128_MASK,
31059 IX86_BUILTIN_DIVPD256_MASK,
31060 IX86_BUILTIN_DIVPS256_MASK,
31061 IX86_BUILTIN_MULPD256_MASK,
31062 IX86_BUILTIN_MULPS256_MASK,
31063 IX86_BUILTIN_ADDPD128_MASK,
31064 IX86_BUILTIN_ADDPD256_MASK,
31065 IX86_BUILTIN_ADDPS128_MASK,
31066 IX86_BUILTIN_ADDPS256_MASK,
31067 IX86_BUILTIN_SUBPD128_MASK,
31068 IX86_BUILTIN_SUBPD256_MASK,
31069 IX86_BUILTIN_SUBPS128_MASK,
31070 IX86_BUILTIN_SUBPS256_MASK,
31071 IX86_BUILTIN_XORPD256_MASK,
31072 IX86_BUILTIN_XORPD128_MASK,
31073 IX86_BUILTIN_XORPS256_MASK,
31074 IX86_BUILTIN_XORPS128_MASK,
31075 IX86_BUILTIN_ORPD256_MASK,
31076 IX86_BUILTIN_ORPD128_MASK,
31077 IX86_BUILTIN_ORPS256_MASK,
31078 IX86_BUILTIN_ORPS128_MASK,
31079 IX86_BUILTIN_BROADCASTF32x2_256,
31080 IX86_BUILTIN_BROADCASTI32x2_256,
31081 IX86_BUILTIN_BROADCASTI32x2_128,
31082 IX86_BUILTIN_BROADCASTF64X2_256,
31083 IX86_BUILTIN_BROADCASTI64X2_256,
31084 IX86_BUILTIN_BROADCASTF32X4_256,
31085 IX86_BUILTIN_BROADCASTI32X4_256,
31086 IX86_BUILTIN_EXTRACTF32X4_256,
31087 IX86_BUILTIN_EXTRACTI32X4_256,
31088 IX86_BUILTIN_DBPSADBW256,
31089 IX86_BUILTIN_DBPSADBW128,
31090 IX86_BUILTIN_CVTTPD2QQ256,
31091 IX86_BUILTIN_CVTTPD2QQ128,
31092 IX86_BUILTIN_CVTTPD2UQQ256,
31093 IX86_BUILTIN_CVTTPD2UQQ128,
31094 IX86_BUILTIN_CVTPD2QQ256,
31095 IX86_BUILTIN_CVTPD2QQ128,
31096 IX86_BUILTIN_CVTPD2UQQ256,
31097 IX86_BUILTIN_CVTPD2UQQ128,
31098 IX86_BUILTIN_CVTPD2UDQ256_MASK,
31099 IX86_BUILTIN_CVTPD2UDQ128_MASK,
31100 IX86_BUILTIN_CVTTPS2QQ256,
31101 IX86_BUILTIN_CVTTPS2QQ128,
31102 IX86_BUILTIN_CVTTPS2UQQ256,
31103 IX86_BUILTIN_CVTTPS2UQQ128,
31104 IX86_BUILTIN_CVTTPS2DQ256_MASK,
31105 IX86_BUILTIN_CVTTPS2DQ128_MASK,
31106 IX86_BUILTIN_CVTTPS2UDQ256,
31107 IX86_BUILTIN_CVTTPS2UDQ128,
31108 IX86_BUILTIN_CVTTPD2DQ256_MASK,
31109 IX86_BUILTIN_CVTTPD2DQ128_MASK,
31110 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
31111 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
31112 IX86_BUILTIN_CVTPD2DQ256_MASK,
31113 IX86_BUILTIN_CVTPD2DQ128_MASK,
31114 IX86_BUILTIN_CVTDQ2PD256_MASK,
31115 IX86_BUILTIN_CVTDQ2PD128_MASK,
31116 IX86_BUILTIN_CVTUDQ2PD256_MASK,
31117 IX86_BUILTIN_CVTUDQ2PD128_MASK,
31118 IX86_BUILTIN_CVTDQ2PS256_MASK,
31119 IX86_BUILTIN_CVTDQ2PS128_MASK,
31120 IX86_BUILTIN_CVTUDQ2PS256_MASK,
31121 IX86_BUILTIN_CVTUDQ2PS128_MASK,
31122 IX86_BUILTIN_CVTPS2PD256_MASK,
31123 IX86_BUILTIN_CVTPS2PD128_MASK,
31124 IX86_BUILTIN_PBROADCASTB256_MASK,
31125 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
31126 IX86_BUILTIN_PBROADCASTB128_MASK,
31127 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
31128 IX86_BUILTIN_PBROADCASTW256_MASK,
31129 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
31130 IX86_BUILTIN_PBROADCASTW128_MASK,
31131 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
31132 IX86_BUILTIN_PBROADCASTD256_MASK,
31133 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
31134 IX86_BUILTIN_PBROADCASTD128_MASK,
31135 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
31136 IX86_BUILTIN_PBROADCASTQ256_MASK,
31137 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
31138 IX86_BUILTIN_PBROADCASTQ128_MASK,
31139 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
31140 IX86_BUILTIN_BROADCASTSS256,
31141 IX86_BUILTIN_BROADCASTSS128,
31142 IX86_BUILTIN_BROADCASTSD256,
31143 IX86_BUILTIN_EXTRACTF64X2_256,
31144 IX86_BUILTIN_EXTRACTI64X2_256,
31145 IX86_BUILTIN_INSERTF32X4_256,
31146 IX86_BUILTIN_INSERTI32X4_256,
31147 IX86_BUILTIN_PMOVSXBW256_MASK,
31148 IX86_BUILTIN_PMOVSXBW128_MASK,
31149 IX86_BUILTIN_PMOVSXBD256_MASK,
31150 IX86_BUILTIN_PMOVSXBD128_MASK,
31151 IX86_BUILTIN_PMOVSXBQ256_MASK,
31152 IX86_BUILTIN_PMOVSXBQ128_MASK,
31153 IX86_BUILTIN_PMOVSXWD256_MASK,
31154 IX86_BUILTIN_PMOVSXWD128_MASK,
31155 IX86_BUILTIN_PMOVSXWQ256_MASK,
31156 IX86_BUILTIN_PMOVSXWQ128_MASK,
31157 IX86_BUILTIN_PMOVSXDQ256_MASK,
31158 IX86_BUILTIN_PMOVSXDQ128_MASK,
31159 IX86_BUILTIN_PMOVZXBW256_MASK,
31160 IX86_BUILTIN_PMOVZXBW128_MASK,
31161 IX86_BUILTIN_PMOVZXBD256_MASK,
31162 IX86_BUILTIN_PMOVZXBD128_MASK,
31163 IX86_BUILTIN_PMOVZXBQ256_MASK,
31164 IX86_BUILTIN_PMOVZXBQ128_MASK,
31165 IX86_BUILTIN_PMOVZXWD256_MASK,
31166 IX86_BUILTIN_PMOVZXWD128_MASK,
31167 IX86_BUILTIN_PMOVZXWQ256_MASK,
31168 IX86_BUILTIN_PMOVZXWQ128_MASK,
31169 IX86_BUILTIN_PMOVZXDQ256_MASK,
31170 IX86_BUILTIN_PMOVZXDQ128_MASK,
31171 IX86_BUILTIN_REDUCEPD256_MASK,
31172 IX86_BUILTIN_REDUCEPD128_MASK,
31173 IX86_BUILTIN_REDUCEPS256_MASK,
31174 IX86_BUILTIN_REDUCEPS128_MASK,
31175 IX86_BUILTIN_REDUCESD_MASK,
31176 IX86_BUILTIN_REDUCESS_MASK,
31177 IX86_BUILTIN_VPERMVARHI256_MASK,
31178 IX86_BUILTIN_VPERMVARHI128_MASK,
31179 IX86_BUILTIN_VPERMT2VARHI256,
31180 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
31181 IX86_BUILTIN_VPERMT2VARHI128,
31182 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
31183 IX86_BUILTIN_VPERMI2VARHI256,
31184 IX86_BUILTIN_VPERMI2VARHI128,
31185 IX86_BUILTIN_RCP14PD256,
31186 IX86_BUILTIN_RCP14PD128,
31187 IX86_BUILTIN_RCP14PS256,
31188 IX86_BUILTIN_RCP14PS128,
31189 IX86_BUILTIN_RSQRT14PD256_MASK,
31190 IX86_BUILTIN_RSQRT14PD128_MASK,
31191 IX86_BUILTIN_RSQRT14PS256_MASK,
31192 IX86_BUILTIN_RSQRT14PS128_MASK,
31193 IX86_BUILTIN_SQRTPD256_MASK,
31194 IX86_BUILTIN_SQRTPD128_MASK,
31195 IX86_BUILTIN_SQRTPS256_MASK,
31196 IX86_BUILTIN_SQRTPS128_MASK,
31197 IX86_BUILTIN_PADDB128_MASK,
31198 IX86_BUILTIN_PADDW128_MASK,
31199 IX86_BUILTIN_PADDD128_MASK,
31200 IX86_BUILTIN_PADDQ128_MASK,
31201 IX86_BUILTIN_PSUBB128_MASK,
31202 IX86_BUILTIN_PSUBW128_MASK,
31203 IX86_BUILTIN_PSUBD128_MASK,
31204 IX86_BUILTIN_PSUBQ128_MASK,
31205 IX86_BUILTIN_PADDSB128_MASK,
31206 IX86_BUILTIN_PADDSW128_MASK,
31207 IX86_BUILTIN_PSUBSB128_MASK,
31208 IX86_BUILTIN_PSUBSW128_MASK,
31209 IX86_BUILTIN_PADDUSB128_MASK,
31210 IX86_BUILTIN_PADDUSW128_MASK,
31211 IX86_BUILTIN_PSUBUSB128_MASK,
31212 IX86_BUILTIN_PSUBUSW128_MASK,
31213 IX86_BUILTIN_PADDB256_MASK,
31214 IX86_BUILTIN_PADDW256_MASK,
31215 IX86_BUILTIN_PADDD256_MASK,
31216 IX86_BUILTIN_PADDQ256_MASK,
31217 IX86_BUILTIN_PADDSB256_MASK,
31218 IX86_BUILTIN_PADDSW256_MASK,
31219 IX86_BUILTIN_PADDUSB256_MASK,
31220 IX86_BUILTIN_PADDUSW256_MASK,
31221 IX86_BUILTIN_PSUBB256_MASK,
31222 IX86_BUILTIN_PSUBW256_MASK,
31223 IX86_BUILTIN_PSUBD256_MASK,
31224 IX86_BUILTIN_PSUBQ256_MASK,
31225 IX86_BUILTIN_PSUBSB256_MASK,
31226 IX86_BUILTIN_PSUBSW256_MASK,
31227 IX86_BUILTIN_PSUBUSB256_MASK,
31228 IX86_BUILTIN_PSUBUSW256_MASK,
31229 IX86_BUILTIN_SHUF_F64x2_256,
31230 IX86_BUILTIN_SHUF_I64x2_256,
31231 IX86_BUILTIN_SHUF_I32x4_256,
31232 IX86_BUILTIN_SHUF_F32x4_256,
31233 IX86_BUILTIN_PMOVWB128,
31234 IX86_BUILTIN_PMOVWB256,
31235 IX86_BUILTIN_PMOVSWB128,
31236 IX86_BUILTIN_PMOVSWB256,
31237 IX86_BUILTIN_PMOVUSWB128,
31238 IX86_BUILTIN_PMOVUSWB256,
31239 IX86_BUILTIN_PMOVDB128,
31240 IX86_BUILTIN_PMOVDB256,
31241 IX86_BUILTIN_PMOVSDB128,
31242 IX86_BUILTIN_PMOVSDB256,
31243 IX86_BUILTIN_PMOVUSDB128,
31244 IX86_BUILTIN_PMOVUSDB256,
31245 IX86_BUILTIN_PMOVDW128,
31246 IX86_BUILTIN_PMOVDW256,
31247 IX86_BUILTIN_PMOVSDW128,
31248 IX86_BUILTIN_PMOVSDW256,
31249 IX86_BUILTIN_PMOVUSDW128,
31250 IX86_BUILTIN_PMOVUSDW256,
31251 IX86_BUILTIN_PMOVQB128,
31252 IX86_BUILTIN_PMOVQB256,
31253 IX86_BUILTIN_PMOVSQB128,
31254 IX86_BUILTIN_PMOVSQB256,
31255 IX86_BUILTIN_PMOVUSQB128,
31256 IX86_BUILTIN_PMOVUSQB256,
31257 IX86_BUILTIN_PMOVQW128,
31258 IX86_BUILTIN_PMOVQW256,
31259 IX86_BUILTIN_PMOVSQW128,
31260 IX86_BUILTIN_PMOVSQW256,
31261 IX86_BUILTIN_PMOVUSQW128,
31262 IX86_BUILTIN_PMOVUSQW256,
31263 IX86_BUILTIN_PMOVQD128,
31264 IX86_BUILTIN_PMOVQD256,
31265 IX86_BUILTIN_PMOVSQD128,
31266 IX86_BUILTIN_PMOVSQD256,
31267 IX86_BUILTIN_PMOVUSQD128,
31268 IX86_BUILTIN_PMOVUSQD256,
31269 IX86_BUILTIN_RANGEPD256,
31270 IX86_BUILTIN_RANGEPD128,
31271 IX86_BUILTIN_RANGEPS256,
31272 IX86_BUILTIN_RANGEPS128,
31273 IX86_BUILTIN_GETEXPPS256,
31274 IX86_BUILTIN_GETEXPPD256,
31275 IX86_BUILTIN_GETEXPPS128,
31276 IX86_BUILTIN_GETEXPPD128,
31277 IX86_BUILTIN_FIXUPIMMPD256_MASK,
31278 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
31279 IX86_BUILTIN_FIXUPIMMPS256_MASK,
31280 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
31281 IX86_BUILTIN_FIXUPIMMPD128_MASK,
31282 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
31283 IX86_BUILTIN_FIXUPIMMPS128_MASK,
31284 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
31285 IX86_BUILTIN_PABSQ256,
31286 IX86_BUILTIN_PABSQ128,
31287 IX86_BUILTIN_PABSD256_MASK,
31288 IX86_BUILTIN_PABSD128_MASK,
31289 IX86_BUILTIN_PMULHRSW256_MASK,
31290 IX86_BUILTIN_PMULHRSW128_MASK,
31291 IX86_BUILTIN_PMULHUW128_MASK,
31292 IX86_BUILTIN_PMULHUW256_MASK,
31293 IX86_BUILTIN_PMULHW256_MASK,
31294 IX86_BUILTIN_PMULHW128_MASK,
31295 IX86_BUILTIN_PMULLW256_MASK,
31296 IX86_BUILTIN_PMULLW128_MASK,
31297 IX86_BUILTIN_PMULLQ256,
31298 IX86_BUILTIN_PMULLQ128,
31299 IX86_BUILTIN_ANDPD256_MASK,
31300 IX86_BUILTIN_ANDPD128_MASK,
31301 IX86_BUILTIN_ANDPS256_MASK,
31302 IX86_BUILTIN_ANDPS128_MASK,
31303 IX86_BUILTIN_ANDNPD256_MASK,
31304 IX86_BUILTIN_ANDNPD128_MASK,
31305 IX86_BUILTIN_ANDNPS256_MASK,
31306 IX86_BUILTIN_ANDNPS128_MASK,
31307 IX86_BUILTIN_PSLLWI128_MASK,
31308 IX86_BUILTIN_PSLLDI128_MASK,
31309 IX86_BUILTIN_PSLLQI128_MASK,
31310 IX86_BUILTIN_PSLLW128_MASK,
31311 IX86_BUILTIN_PSLLD128_MASK,
31312 IX86_BUILTIN_PSLLQ128_MASK,
31313 IX86_BUILTIN_PSLLWI256_MASK ,
31314 IX86_BUILTIN_PSLLW256_MASK,
31315 IX86_BUILTIN_PSLLDI256_MASK,
31316 IX86_BUILTIN_PSLLD256_MASK,
31317 IX86_BUILTIN_PSLLQI256_MASK,
31318 IX86_BUILTIN_PSLLQ256_MASK,
31319 IX86_BUILTIN_PSRADI128_MASK,
31320 IX86_BUILTIN_PSRAD128_MASK,
31321 IX86_BUILTIN_PSRADI256_MASK,
31322 IX86_BUILTIN_PSRAD256_MASK,
31323 IX86_BUILTIN_PSRAQI128_MASK,
31324 IX86_BUILTIN_PSRAQ128_MASK,
31325 IX86_BUILTIN_PSRAQI256_MASK,
31326 IX86_BUILTIN_PSRAQ256_MASK,
31327 IX86_BUILTIN_PANDD256,
31328 IX86_BUILTIN_PANDD128,
31329 IX86_BUILTIN_PSRLDI128_MASK,
31330 IX86_BUILTIN_PSRLD128_MASK,
31331 IX86_BUILTIN_PSRLDI256_MASK,
31332 IX86_BUILTIN_PSRLD256_MASK,
31333 IX86_BUILTIN_PSRLQI128_MASK,
31334 IX86_BUILTIN_PSRLQ128_MASK,
31335 IX86_BUILTIN_PSRLQI256_MASK,
31336 IX86_BUILTIN_PSRLQ256_MASK,
31337 IX86_BUILTIN_PANDQ256,
31338 IX86_BUILTIN_PANDQ128,
31339 IX86_BUILTIN_PANDND256,
31340 IX86_BUILTIN_PANDND128,
31341 IX86_BUILTIN_PANDNQ256,
31342 IX86_BUILTIN_PANDNQ128,
31343 IX86_BUILTIN_PORD256,
31344 IX86_BUILTIN_PORD128,
31345 IX86_BUILTIN_PORQ256,
31346 IX86_BUILTIN_PORQ128,
31347 IX86_BUILTIN_PXORD256,
31348 IX86_BUILTIN_PXORD128,
31349 IX86_BUILTIN_PXORQ256,
31350 IX86_BUILTIN_PXORQ128,
31351 IX86_BUILTIN_PACKSSWB256_MASK,
31352 IX86_BUILTIN_PACKSSWB128_MASK,
31353 IX86_BUILTIN_PACKUSWB256_MASK,
31354 IX86_BUILTIN_PACKUSWB128_MASK,
31355 IX86_BUILTIN_RNDSCALEPS256,
31356 IX86_BUILTIN_RNDSCALEPD256,
31357 IX86_BUILTIN_RNDSCALEPS128,
31358 IX86_BUILTIN_RNDSCALEPD128,
31359 IX86_BUILTIN_VTERNLOGQ256_MASK,
31360 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
31361 IX86_BUILTIN_VTERNLOGD256_MASK,
31362 IX86_BUILTIN_VTERNLOGD256_MASKZ,
31363 IX86_BUILTIN_VTERNLOGQ128_MASK,
31364 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
31365 IX86_BUILTIN_VTERNLOGD128_MASK,
31366 IX86_BUILTIN_VTERNLOGD128_MASKZ,
31367 IX86_BUILTIN_SCALEFPD256,
31368 IX86_BUILTIN_SCALEFPS256,
31369 IX86_BUILTIN_SCALEFPD128,
31370 IX86_BUILTIN_SCALEFPS128,
31371 IX86_BUILTIN_VFMADDPD256_MASK,
31372 IX86_BUILTIN_VFMADDPD256_MASK3,
31373 IX86_BUILTIN_VFMADDPD256_MASKZ,
31374 IX86_BUILTIN_VFMADDPD128_MASK,
31375 IX86_BUILTIN_VFMADDPD128_MASK3,
31376 IX86_BUILTIN_VFMADDPD128_MASKZ,
31377 IX86_BUILTIN_VFMADDPS256_MASK,
31378 IX86_BUILTIN_VFMADDPS256_MASK3,
31379 IX86_BUILTIN_VFMADDPS256_MASKZ,
31380 IX86_BUILTIN_VFMADDPS128_MASK,
31381 IX86_BUILTIN_VFMADDPS128_MASK3,
31382 IX86_BUILTIN_VFMADDPS128_MASKZ,
31383 IX86_BUILTIN_VFMSUBPD256_MASK3,
31384 IX86_BUILTIN_VFMSUBPD128_MASK3,
31385 IX86_BUILTIN_VFMSUBPS256_MASK3,
31386 IX86_BUILTIN_VFMSUBPS128_MASK3,
31387 IX86_BUILTIN_VFNMADDPD256_MASK,
31388 IX86_BUILTIN_VFNMADDPD128_MASK,
31389 IX86_BUILTIN_VFNMADDPS256_MASK,
31390 IX86_BUILTIN_VFNMADDPS128_MASK,
31391 IX86_BUILTIN_VFNMSUBPD256_MASK,
31392 IX86_BUILTIN_VFNMSUBPD256_MASK3,
31393 IX86_BUILTIN_VFNMSUBPD128_MASK,
31394 IX86_BUILTIN_VFNMSUBPD128_MASK3,
31395 IX86_BUILTIN_VFNMSUBPS256_MASK,
31396 IX86_BUILTIN_VFNMSUBPS256_MASK3,
31397 IX86_BUILTIN_VFNMSUBPS128_MASK,
31398 IX86_BUILTIN_VFNMSUBPS128_MASK3,
31399 IX86_BUILTIN_VFMADDSUBPD256_MASK,
31400 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
31401 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
31402 IX86_BUILTIN_VFMADDSUBPD128_MASK,
31403 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
31404 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
31405 IX86_BUILTIN_VFMADDSUBPS256_MASK,
31406 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
31407 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
31408 IX86_BUILTIN_VFMADDSUBPS128_MASK,
31409 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
31410 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
31411 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
31412 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
31413 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
31414 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
31415 IX86_BUILTIN_INSERTF64X2_256,
31416 IX86_BUILTIN_INSERTI64X2_256,
31417 IX86_BUILTIN_PSRAVV16HI,
31418 IX86_BUILTIN_PSRAVV8HI,
31419 IX86_BUILTIN_PMADDUBSW256_MASK,
31420 IX86_BUILTIN_PMADDUBSW128_MASK,
31421 IX86_BUILTIN_PMADDWD256_MASK,
31422 IX86_BUILTIN_PMADDWD128_MASK,
31423 IX86_BUILTIN_PSRLVV16HI,
31424 IX86_BUILTIN_PSRLVV8HI,
31425 IX86_BUILTIN_CVTPS2DQ256_MASK,
31426 IX86_BUILTIN_CVTPS2DQ128_MASK,
31427 IX86_BUILTIN_CVTPS2UDQ256,
31428 IX86_BUILTIN_CVTPS2UDQ128,
31429 IX86_BUILTIN_CVTPS2QQ256,
31430 IX86_BUILTIN_CVTPS2QQ128,
31431 IX86_BUILTIN_CVTPS2UQQ256,
31432 IX86_BUILTIN_CVTPS2UQQ128,
31433 IX86_BUILTIN_GETMANTPS256,
31434 IX86_BUILTIN_GETMANTPS128,
31435 IX86_BUILTIN_GETMANTPD256,
31436 IX86_BUILTIN_GETMANTPD128,
31437 IX86_BUILTIN_MOVDDUP256_MASK,
31438 IX86_BUILTIN_MOVDDUP128_MASK,
31439 IX86_BUILTIN_MOVSHDUP256_MASK,
31440 IX86_BUILTIN_MOVSHDUP128_MASK,
31441 IX86_BUILTIN_MOVSLDUP256_MASK,
31442 IX86_BUILTIN_MOVSLDUP128_MASK,
31443 IX86_BUILTIN_CVTQQ2PS256,
31444 IX86_BUILTIN_CVTQQ2PS128,
31445 IX86_BUILTIN_CVTUQQ2PS256,
31446 IX86_BUILTIN_CVTUQQ2PS128,
31447 IX86_BUILTIN_CVTQQ2PD256,
31448 IX86_BUILTIN_CVTQQ2PD128,
31449 IX86_BUILTIN_CVTUQQ2PD256,
31450 IX86_BUILTIN_CVTUQQ2PD128,
31451 IX86_BUILTIN_VPERMT2VARQ256,
31452 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31453 IX86_BUILTIN_VPERMT2VARD256,
31454 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31455 IX86_BUILTIN_VPERMI2VARQ256,
31456 IX86_BUILTIN_VPERMI2VARD256,
31457 IX86_BUILTIN_VPERMT2VARPD256,
31458 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31459 IX86_BUILTIN_VPERMT2VARPS256,
31460 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31461 IX86_BUILTIN_VPERMI2VARPD256,
31462 IX86_BUILTIN_VPERMI2VARPS256,
31463 IX86_BUILTIN_VPERMT2VARQ128,
31464 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31465 IX86_BUILTIN_VPERMT2VARD128,
31466 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31467 IX86_BUILTIN_VPERMI2VARQ128,
31468 IX86_BUILTIN_VPERMI2VARD128,
31469 IX86_BUILTIN_VPERMT2VARPD128,
31470 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31471 IX86_BUILTIN_VPERMT2VARPS128,
31472 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31473 IX86_BUILTIN_VPERMI2VARPD128,
31474 IX86_BUILTIN_VPERMI2VARPS128,
31475 IX86_BUILTIN_PSHUFB256_MASK,
31476 IX86_BUILTIN_PSHUFB128_MASK,
31477 IX86_BUILTIN_PSHUFHW256_MASK,
31478 IX86_BUILTIN_PSHUFHW128_MASK,
31479 IX86_BUILTIN_PSHUFLW256_MASK,
31480 IX86_BUILTIN_PSHUFLW128_MASK,
31481 IX86_BUILTIN_PSHUFD256_MASK,
31482 IX86_BUILTIN_PSHUFD128_MASK,
31483 IX86_BUILTIN_SHUFPD256_MASK,
31484 IX86_BUILTIN_SHUFPD128_MASK,
31485 IX86_BUILTIN_SHUFPS256_MASK,
31486 IX86_BUILTIN_SHUFPS128_MASK,
31487 IX86_BUILTIN_PROLVQ256,
31488 IX86_BUILTIN_PROLVQ128,
31489 IX86_BUILTIN_PROLQ256,
31490 IX86_BUILTIN_PROLQ128,
31491 IX86_BUILTIN_PRORVQ256,
31492 IX86_BUILTIN_PRORVQ128,
31493 IX86_BUILTIN_PRORQ256,
31494 IX86_BUILTIN_PRORQ128,
31495 IX86_BUILTIN_PSRAVQ128,
31496 IX86_BUILTIN_PSRAVQ256,
31497 IX86_BUILTIN_PSLLVV4DI_MASK,
31498 IX86_BUILTIN_PSLLVV2DI_MASK,
31499 IX86_BUILTIN_PSLLVV8SI_MASK,
31500 IX86_BUILTIN_PSLLVV4SI_MASK,
31501 IX86_BUILTIN_PSRAVV8SI_MASK,
31502 IX86_BUILTIN_PSRAVV4SI_MASK,
31503 IX86_BUILTIN_PSRLVV4DI_MASK,
31504 IX86_BUILTIN_PSRLVV2DI_MASK,
31505 IX86_BUILTIN_PSRLVV8SI_MASK,
31506 IX86_BUILTIN_PSRLVV4SI_MASK,
31507 IX86_BUILTIN_PSRAWI256_MASK,
31508 IX86_BUILTIN_PSRAW256_MASK,
31509 IX86_BUILTIN_PSRAWI128_MASK,
31510 IX86_BUILTIN_PSRAW128_MASK,
31511 IX86_BUILTIN_PSRLWI256_MASK,
31512 IX86_BUILTIN_PSRLW256_MASK,
31513 IX86_BUILTIN_PSRLWI128_MASK,
31514 IX86_BUILTIN_PSRLW128_MASK,
31515 IX86_BUILTIN_PRORVD256,
31516 IX86_BUILTIN_PROLVD256,
31517 IX86_BUILTIN_PRORD256,
31518 IX86_BUILTIN_PROLD256,
31519 IX86_BUILTIN_PRORVD128,
31520 IX86_BUILTIN_PROLVD128,
31521 IX86_BUILTIN_PRORD128,
31522 IX86_BUILTIN_PROLD128,
31523 IX86_BUILTIN_FPCLASSPD256,
31524 IX86_BUILTIN_FPCLASSPD128,
31525 IX86_BUILTIN_FPCLASSSD,
31526 IX86_BUILTIN_FPCLASSPS256,
31527 IX86_BUILTIN_FPCLASSPS128,
31528 IX86_BUILTIN_FPCLASSSS,
31529 IX86_BUILTIN_CVTB2MASK128,
31530 IX86_BUILTIN_CVTB2MASK256,
31531 IX86_BUILTIN_CVTW2MASK128,
31532 IX86_BUILTIN_CVTW2MASK256,
31533 IX86_BUILTIN_CVTD2MASK128,
31534 IX86_BUILTIN_CVTD2MASK256,
31535 IX86_BUILTIN_CVTQ2MASK128,
31536 IX86_BUILTIN_CVTQ2MASK256,
31537 IX86_BUILTIN_CVTMASK2B128,
31538 IX86_BUILTIN_CVTMASK2B256,
31539 IX86_BUILTIN_CVTMASK2W128,
31540 IX86_BUILTIN_CVTMASK2W256,
31541 IX86_BUILTIN_CVTMASK2D128,
31542 IX86_BUILTIN_CVTMASK2D256,
31543 IX86_BUILTIN_CVTMASK2Q128,
31544 IX86_BUILTIN_CVTMASK2Q256,
31545 IX86_BUILTIN_PCMPEQB128_MASK,
31546 IX86_BUILTIN_PCMPEQB256_MASK,
31547 IX86_BUILTIN_PCMPEQW128_MASK,
31548 IX86_BUILTIN_PCMPEQW256_MASK,
31549 IX86_BUILTIN_PCMPEQD128_MASK,
31550 IX86_BUILTIN_PCMPEQD256_MASK,
31551 IX86_BUILTIN_PCMPEQQ128_MASK,
31552 IX86_BUILTIN_PCMPEQQ256_MASK,
31553 IX86_BUILTIN_PCMPGTB128_MASK,
31554 IX86_BUILTIN_PCMPGTB256_MASK,
31555 IX86_BUILTIN_PCMPGTW128_MASK,
31556 IX86_BUILTIN_PCMPGTW256_MASK,
31557 IX86_BUILTIN_PCMPGTD128_MASK,
31558 IX86_BUILTIN_PCMPGTD256_MASK,
31559 IX86_BUILTIN_PCMPGTQ128_MASK,
31560 IX86_BUILTIN_PCMPGTQ256_MASK,
31561 IX86_BUILTIN_PTESTMB128,
31562 IX86_BUILTIN_PTESTMB256,
31563 IX86_BUILTIN_PTESTMW128,
31564 IX86_BUILTIN_PTESTMW256,
31565 IX86_BUILTIN_PTESTMD128,
31566 IX86_BUILTIN_PTESTMD256,
31567 IX86_BUILTIN_PTESTMQ128,
31568 IX86_BUILTIN_PTESTMQ256,
31569 IX86_BUILTIN_PTESTNMB128,
31570 IX86_BUILTIN_PTESTNMB256,
31571 IX86_BUILTIN_PTESTNMW128,
31572 IX86_BUILTIN_PTESTNMW256,
31573 IX86_BUILTIN_PTESTNMD128,
31574 IX86_BUILTIN_PTESTNMD256,
31575 IX86_BUILTIN_PTESTNMQ128,
31576 IX86_BUILTIN_PTESTNMQ256,
31577 IX86_BUILTIN_PBROADCASTMB128,
31578 IX86_BUILTIN_PBROADCASTMB256,
31579 IX86_BUILTIN_PBROADCASTMW128,
31580 IX86_BUILTIN_PBROADCASTMW256,
31581 IX86_BUILTIN_COMPRESSPD256,
31582 IX86_BUILTIN_COMPRESSPD128,
31583 IX86_BUILTIN_COMPRESSPS256,
31584 IX86_BUILTIN_COMPRESSPS128,
31585 IX86_BUILTIN_PCOMPRESSQ256,
31586 IX86_BUILTIN_PCOMPRESSQ128,
31587 IX86_BUILTIN_PCOMPRESSD256,
31588 IX86_BUILTIN_PCOMPRESSD128,
31589 IX86_BUILTIN_EXPANDPD256,
31590 IX86_BUILTIN_EXPANDPD128,
31591 IX86_BUILTIN_EXPANDPS256,
31592 IX86_BUILTIN_EXPANDPS128,
31593 IX86_BUILTIN_PEXPANDQ256,
31594 IX86_BUILTIN_PEXPANDQ128,
31595 IX86_BUILTIN_PEXPANDD256,
31596 IX86_BUILTIN_PEXPANDD128,
31597 IX86_BUILTIN_EXPANDPD256Z,
31598 IX86_BUILTIN_EXPANDPD128Z,
31599 IX86_BUILTIN_EXPANDPS256Z,
31600 IX86_BUILTIN_EXPANDPS128Z,
31601 IX86_BUILTIN_PEXPANDQ256Z,
31602 IX86_BUILTIN_PEXPANDQ128Z,
31603 IX86_BUILTIN_PEXPANDD256Z,
31604 IX86_BUILTIN_PEXPANDD128Z,
31605 IX86_BUILTIN_PMAXSD256_MASK,
31606 IX86_BUILTIN_PMINSD256_MASK,
31607 IX86_BUILTIN_PMAXUD256_MASK,
31608 IX86_BUILTIN_PMINUD256_MASK,
31609 IX86_BUILTIN_PMAXSD128_MASK,
31610 IX86_BUILTIN_PMINSD128_MASK,
31611 IX86_BUILTIN_PMAXUD128_MASK,
31612 IX86_BUILTIN_PMINUD128_MASK,
31613 IX86_BUILTIN_PMAXSQ256_MASK,
31614 IX86_BUILTIN_PMINSQ256_MASK,
31615 IX86_BUILTIN_PMAXUQ256_MASK,
31616 IX86_BUILTIN_PMINUQ256_MASK,
31617 IX86_BUILTIN_PMAXSQ128_MASK,
31618 IX86_BUILTIN_PMINSQ128_MASK,
31619 IX86_BUILTIN_PMAXUQ128_MASK,
31620 IX86_BUILTIN_PMINUQ128_MASK,
31621 IX86_BUILTIN_PMINSB256_MASK,
31622 IX86_BUILTIN_PMINUB256_MASK,
31623 IX86_BUILTIN_PMAXSB256_MASK,
31624 IX86_BUILTIN_PMAXUB256_MASK,
31625 IX86_BUILTIN_PMINSB128_MASK,
31626 IX86_BUILTIN_PMINUB128_MASK,
31627 IX86_BUILTIN_PMAXSB128_MASK,
31628 IX86_BUILTIN_PMAXUB128_MASK,
31629 IX86_BUILTIN_PMINSW256_MASK,
31630 IX86_BUILTIN_PMINUW256_MASK,
31631 IX86_BUILTIN_PMAXSW256_MASK,
31632 IX86_BUILTIN_PMAXUW256_MASK,
31633 IX86_BUILTIN_PMINSW128_MASK,
31634 IX86_BUILTIN_PMINUW128_MASK,
31635 IX86_BUILTIN_PMAXSW128_MASK,
31636 IX86_BUILTIN_PMAXUW128_MASK,
31637 IX86_BUILTIN_VPCONFLICTQ256,
31638 IX86_BUILTIN_VPCONFLICTD256,
31639 IX86_BUILTIN_VPCLZCNTQ256,
31640 IX86_BUILTIN_VPCLZCNTD256,
31641 IX86_BUILTIN_UNPCKHPD256_MASK,
31642 IX86_BUILTIN_UNPCKHPD128_MASK,
31643 IX86_BUILTIN_UNPCKHPS256_MASK,
31644 IX86_BUILTIN_UNPCKHPS128_MASK,
31645 IX86_BUILTIN_UNPCKLPD256_MASK,
31646 IX86_BUILTIN_UNPCKLPD128_MASK,
31647 IX86_BUILTIN_UNPCKLPS256_MASK,
31648 IX86_BUILTIN_VPCONFLICTQ128,
31649 IX86_BUILTIN_VPCONFLICTD128,
31650 IX86_BUILTIN_VPCLZCNTQ128,
31651 IX86_BUILTIN_VPCLZCNTD128,
31652 IX86_BUILTIN_UNPCKLPS128_MASK,
31653 IX86_BUILTIN_ALIGND256,
31654 IX86_BUILTIN_ALIGNQ256,
31655 IX86_BUILTIN_ALIGND128,
31656 IX86_BUILTIN_ALIGNQ128,
31657 IX86_BUILTIN_CVTPS2PH256_MASK,
31658 IX86_BUILTIN_CVTPS2PH_MASK,
31659 IX86_BUILTIN_CVTPH2PS_MASK,
31660 IX86_BUILTIN_CVTPH2PS256_MASK,
31661 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31662 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31663 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31664 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31665 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31666 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31667 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31668 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31669 IX86_BUILTIN_PUNPCKHBW128_MASK,
31670 IX86_BUILTIN_PUNPCKHBW256_MASK,
31671 IX86_BUILTIN_PUNPCKHWD128_MASK,
31672 IX86_BUILTIN_PUNPCKHWD256_MASK,
31673 IX86_BUILTIN_PUNPCKLBW128_MASK,
31674 IX86_BUILTIN_PUNPCKLBW256_MASK,
31675 IX86_BUILTIN_PUNPCKLWD128_MASK,
31676 IX86_BUILTIN_PUNPCKLWD256_MASK,
31677 IX86_BUILTIN_PSLLVV16HI,
31678 IX86_BUILTIN_PSLLVV8HI,
31679 IX86_BUILTIN_PACKSSDW256_MASK,
31680 IX86_BUILTIN_PACKSSDW128_MASK,
31681 IX86_BUILTIN_PACKUSDW256_MASK,
31682 IX86_BUILTIN_PACKUSDW128_MASK,
31683 IX86_BUILTIN_PAVGB256_MASK,
31684 IX86_BUILTIN_PAVGW256_MASK,
31685 IX86_BUILTIN_PAVGB128_MASK,
31686 IX86_BUILTIN_PAVGW128_MASK,
31687 IX86_BUILTIN_VPERMVARSF256_MASK,
31688 IX86_BUILTIN_VPERMVARDF256_MASK,
31689 IX86_BUILTIN_VPERMDF256_MASK,
31690 IX86_BUILTIN_PABSB256_MASK,
31691 IX86_BUILTIN_PABSB128_MASK,
31692 IX86_BUILTIN_PABSW256_MASK,
31693 IX86_BUILTIN_PABSW128_MASK,
31694 IX86_BUILTIN_VPERMILVARPD_MASK,
31695 IX86_BUILTIN_VPERMILVARPS_MASK,
31696 IX86_BUILTIN_VPERMILVARPD256_MASK,
31697 IX86_BUILTIN_VPERMILVARPS256_MASK,
31698 IX86_BUILTIN_VPERMILPD_MASK,
31699 IX86_BUILTIN_VPERMILPS_MASK,
31700 IX86_BUILTIN_VPERMILPD256_MASK,
31701 IX86_BUILTIN_VPERMILPS256_MASK,
31702 IX86_BUILTIN_BLENDMQ256,
31703 IX86_BUILTIN_BLENDMD256,
31704 IX86_BUILTIN_BLENDMPD256,
31705 IX86_BUILTIN_BLENDMPS256,
31706 IX86_BUILTIN_BLENDMQ128,
31707 IX86_BUILTIN_BLENDMD128,
31708 IX86_BUILTIN_BLENDMPD128,
31709 IX86_BUILTIN_BLENDMPS128,
31710 IX86_BUILTIN_BLENDMW256,
31711 IX86_BUILTIN_BLENDMB256,
31712 IX86_BUILTIN_BLENDMW128,
31713 IX86_BUILTIN_BLENDMB128,
31714 IX86_BUILTIN_PMULLD256_MASK,
31715 IX86_BUILTIN_PMULLD128_MASK,
31716 IX86_BUILTIN_PMULUDQ256_MASK,
31717 IX86_BUILTIN_PMULDQ256_MASK,
31718 IX86_BUILTIN_PMULDQ128_MASK,
31719 IX86_BUILTIN_PMULUDQ128_MASK,
31720 IX86_BUILTIN_CVTPD2PS256_MASK,
31721 IX86_BUILTIN_CVTPD2PS_MASK,
31722 IX86_BUILTIN_VPERMVARSI256_MASK,
31723 IX86_BUILTIN_VPERMVARDI256_MASK,
31724 IX86_BUILTIN_VPERMDI256_MASK,
31725 IX86_BUILTIN_CMPQ256,
31726 IX86_BUILTIN_CMPD256,
31727 IX86_BUILTIN_UCMPQ256,
31728 IX86_BUILTIN_UCMPD256,
31729 IX86_BUILTIN_CMPB256,
31730 IX86_BUILTIN_CMPW256,
31731 IX86_BUILTIN_UCMPB256,
31732 IX86_BUILTIN_UCMPW256,
31733 IX86_BUILTIN_CMPPD256_MASK,
31734 IX86_BUILTIN_CMPPS256_MASK,
31735 IX86_BUILTIN_CMPQ128,
31736 IX86_BUILTIN_CMPD128,
31737 IX86_BUILTIN_UCMPQ128,
31738 IX86_BUILTIN_UCMPD128,
31739 IX86_BUILTIN_CMPB128,
31740 IX86_BUILTIN_CMPW128,
31741 IX86_BUILTIN_UCMPB128,
31742 IX86_BUILTIN_UCMPW128,
31743 IX86_BUILTIN_CMPPD128_MASK,
31744 IX86_BUILTIN_CMPPS128_MASK,
31746 IX86_BUILTIN_GATHER3SIV8SF,
31747 IX86_BUILTIN_GATHER3SIV4SF,
31748 IX86_BUILTIN_GATHER3SIV4DF,
31749 IX86_BUILTIN_GATHER3SIV2DF,
31750 IX86_BUILTIN_GATHER3DIV8SF,
31751 IX86_BUILTIN_GATHER3DIV4SF,
31752 IX86_BUILTIN_GATHER3DIV4DF,
31753 IX86_BUILTIN_GATHER3DIV2DF,
31754 IX86_BUILTIN_GATHER3SIV8SI,
31755 IX86_BUILTIN_GATHER3SIV4SI,
31756 IX86_BUILTIN_GATHER3SIV4DI,
31757 IX86_BUILTIN_GATHER3SIV2DI,
31758 IX86_BUILTIN_GATHER3DIV8SI,
31759 IX86_BUILTIN_GATHER3DIV4SI,
31760 IX86_BUILTIN_GATHER3DIV4DI,
31761 IX86_BUILTIN_GATHER3DIV2DI,
31762 IX86_BUILTIN_SCATTERSIV8SF,
31763 IX86_BUILTIN_SCATTERSIV4SF,
31764 IX86_BUILTIN_SCATTERSIV4DF,
31765 IX86_BUILTIN_SCATTERSIV2DF,
31766 IX86_BUILTIN_SCATTERDIV8SF,
31767 IX86_BUILTIN_SCATTERDIV4SF,
31768 IX86_BUILTIN_SCATTERDIV4DF,
31769 IX86_BUILTIN_SCATTERDIV2DF,
31770 IX86_BUILTIN_SCATTERSIV8SI,
31771 IX86_BUILTIN_SCATTERSIV4SI,
31772 IX86_BUILTIN_SCATTERSIV4DI,
31773 IX86_BUILTIN_SCATTERSIV2DI,
31774 IX86_BUILTIN_SCATTERDIV8SI,
31775 IX86_BUILTIN_SCATTERDIV4SI,
31776 IX86_BUILTIN_SCATTERDIV4DI,
31777 IX86_BUILTIN_SCATTERDIV2DI,
31780 IX86_BUILTIN_RANGESD128,
31781 IX86_BUILTIN_RANGESS128,
31782 IX86_BUILTIN_KUNPCKWD,
31783 IX86_BUILTIN_KUNPCKDQ,
31784 IX86_BUILTIN_BROADCASTF32x2_512,
31785 IX86_BUILTIN_BROADCASTI32x2_512,
31786 IX86_BUILTIN_BROADCASTF64X2_512,
31787 IX86_BUILTIN_BROADCASTI64X2_512,
31788 IX86_BUILTIN_BROADCASTF32X8_512,
31789 IX86_BUILTIN_BROADCASTI32X8_512,
31790 IX86_BUILTIN_EXTRACTF64X2_512,
31791 IX86_BUILTIN_EXTRACTF32X8,
31792 IX86_BUILTIN_EXTRACTI64X2_512,
31793 IX86_BUILTIN_EXTRACTI32X8,
31794 IX86_BUILTIN_REDUCEPD512_MASK,
31795 IX86_BUILTIN_REDUCEPS512_MASK,
31796 IX86_BUILTIN_PMULLQ512,
31797 IX86_BUILTIN_XORPD512,
31798 IX86_BUILTIN_XORPS512,
31799 IX86_BUILTIN_ORPD512,
31800 IX86_BUILTIN_ORPS512,
31801 IX86_BUILTIN_ANDPD512,
31802 IX86_BUILTIN_ANDPS512,
31803 IX86_BUILTIN_ANDNPD512,
31804 IX86_BUILTIN_ANDNPS512,
31805 IX86_BUILTIN_INSERTF32X8,
31806 IX86_BUILTIN_INSERTI32X8,
31807 IX86_BUILTIN_INSERTF64X2_512,
31808 IX86_BUILTIN_INSERTI64X2_512,
31809 IX86_BUILTIN_FPCLASSPD512,
31810 IX86_BUILTIN_FPCLASSPS512,
31811 IX86_BUILTIN_CVTD2MASK512,
31812 IX86_BUILTIN_CVTQ2MASK512,
31813 IX86_BUILTIN_CVTMASK2D512,
31814 IX86_BUILTIN_CVTMASK2Q512,
31815 IX86_BUILTIN_CVTPD2QQ512,
31816 IX86_BUILTIN_CVTPS2QQ512,
31817 IX86_BUILTIN_CVTPD2UQQ512,
31818 IX86_BUILTIN_CVTPS2UQQ512,
31819 IX86_BUILTIN_CVTQQ2PS512,
31820 IX86_BUILTIN_CVTUQQ2PS512,
31821 IX86_BUILTIN_CVTQQ2PD512,
31822 IX86_BUILTIN_CVTUQQ2PD512,
31823 IX86_BUILTIN_CVTTPS2QQ512,
31824 IX86_BUILTIN_CVTTPS2UQQ512,
31825 IX86_BUILTIN_CVTTPD2QQ512,
31826 IX86_BUILTIN_CVTTPD2UQQ512,
31827 IX86_BUILTIN_RANGEPS512,
31828 IX86_BUILTIN_RANGEPD512,
31831 IX86_BUILTIN_PACKUSDW512,
31832 IX86_BUILTIN_PACKSSDW512,
31833 IX86_BUILTIN_LOADDQUHI512_MASK,
31834 IX86_BUILTIN_LOADDQUQI512_MASK,
31835 IX86_BUILTIN_PSLLDQ512,
31836 IX86_BUILTIN_PSRLDQ512,
31837 IX86_BUILTIN_STOREDQUHI512_MASK,
31838 IX86_BUILTIN_STOREDQUQI512_MASK,
31839 IX86_BUILTIN_PALIGNR512,
31840 IX86_BUILTIN_PALIGNR512_MASK,
31841 IX86_BUILTIN_MOVDQUHI512_MASK,
31842 IX86_BUILTIN_MOVDQUQI512_MASK,
31843 IX86_BUILTIN_PSADBW512,
31844 IX86_BUILTIN_DBPSADBW512,
31845 IX86_BUILTIN_PBROADCASTB512,
31846 IX86_BUILTIN_PBROADCASTB512_GPR,
31847 IX86_BUILTIN_PBROADCASTW512,
31848 IX86_BUILTIN_PBROADCASTW512_GPR,
31849 IX86_BUILTIN_PMOVSXBW512_MASK,
31850 IX86_BUILTIN_PMOVZXBW512_MASK,
31851 IX86_BUILTIN_VPERMVARHI512_MASK,
31852 IX86_BUILTIN_VPERMT2VARHI512,
31853 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31854 IX86_BUILTIN_VPERMI2VARHI512,
31855 IX86_BUILTIN_PAVGB512,
31856 IX86_BUILTIN_PAVGW512,
31857 IX86_BUILTIN_PADDB512,
31858 IX86_BUILTIN_PSUBB512,
31859 IX86_BUILTIN_PSUBSB512,
31860 IX86_BUILTIN_PADDSB512,
31861 IX86_BUILTIN_PSUBUSB512,
31862 IX86_BUILTIN_PADDUSB512,
31863 IX86_BUILTIN_PSUBW512,
31864 IX86_BUILTIN_PADDW512,
31865 IX86_BUILTIN_PSUBSW512,
31866 IX86_BUILTIN_PADDSW512,
31867 IX86_BUILTIN_PSUBUSW512,
31868 IX86_BUILTIN_PADDUSW512,
31869 IX86_BUILTIN_PMAXUW512,
31870 IX86_BUILTIN_PMAXSW512,
31871 IX86_BUILTIN_PMINUW512,
31872 IX86_BUILTIN_PMINSW512,
31873 IX86_BUILTIN_PMAXUB512,
31874 IX86_BUILTIN_PMAXSB512,
31875 IX86_BUILTIN_PMINUB512,
31876 IX86_BUILTIN_PMINSB512,
31877 IX86_BUILTIN_PMOVWB512,
31878 IX86_BUILTIN_PMOVSWB512,
31879 IX86_BUILTIN_PMOVUSWB512,
31880 IX86_BUILTIN_PMULHRSW512_MASK,
31881 IX86_BUILTIN_PMULHUW512_MASK,
31882 IX86_BUILTIN_PMULHW512_MASK,
31883 IX86_BUILTIN_PMULLW512_MASK,
31884 IX86_BUILTIN_PSLLWI512_MASK,
31885 IX86_BUILTIN_PSLLW512_MASK,
31886 IX86_BUILTIN_PACKSSWB512,
31887 IX86_BUILTIN_PACKUSWB512,
31888 IX86_BUILTIN_PSRAVV32HI,
31889 IX86_BUILTIN_PMADDUBSW512_MASK,
31890 IX86_BUILTIN_PMADDWD512_MASK,
31891 IX86_BUILTIN_PSRLVV32HI,
31892 IX86_BUILTIN_PUNPCKHBW512,
31893 IX86_BUILTIN_PUNPCKHWD512,
31894 IX86_BUILTIN_PUNPCKLBW512,
31895 IX86_BUILTIN_PUNPCKLWD512,
31896 IX86_BUILTIN_PSHUFB512,
31897 IX86_BUILTIN_PSHUFHW512,
31898 IX86_BUILTIN_PSHUFLW512,
31899 IX86_BUILTIN_PSRAWI512,
31900 IX86_BUILTIN_PSRAW512,
31901 IX86_BUILTIN_PSRLWI512,
31902 IX86_BUILTIN_PSRLW512,
31903 IX86_BUILTIN_CVTB2MASK512,
31904 IX86_BUILTIN_CVTW2MASK512,
31905 IX86_BUILTIN_CVTMASK2B512,
31906 IX86_BUILTIN_CVTMASK2W512,
31907 IX86_BUILTIN_PCMPEQB512_MASK,
31908 IX86_BUILTIN_PCMPEQW512_MASK,
31909 IX86_BUILTIN_PCMPGTB512_MASK,
31910 IX86_BUILTIN_PCMPGTW512_MASK,
31911 IX86_BUILTIN_PTESTMB512,
31912 IX86_BUILTIN_PTESTMW512,
31913 IX86_BUILTIN_PTESTNMB512,
31914 IX86_BUILTIN_PTESTNMW512,
31915 IX86_BUILTIN_PSLLVV32HI,
31916 IX86_BUILTIN_PABSB512,
31917 IX86_BUILTIN_PABSW512,
31918 IX86_BUILTIN_BLENDMW512,
31919 IX86_BUILTIN_BLENDMB512,
31920 IX86_BUILTIN_CMPB512,
31921 IX86_BUILTIN_CMPW512,
31922 IX86_BUILTIN_UCMPB512,
31923 IX86_BUILTIN_UCMPW512,
31925 /* Alternate 4 and 8 element gather/scatter for the vectorizer
31926 where all operands are 32-byte or 64-byte wide respectively. */
31927 IX86_BUILTIN_GATHERALTSIV4DF,
31928 IX86_BUILTIN_GATHERALTDIV8SF,
31929 IX86_BUILTIN_GATHERALTSIV4DI,
31930 IX86_BUILTIN_GATHERALTDIV8SI,
31931 IX86_BUILTIN_GATHER3ALTDIV16SF,
31932 IX86_BUILTIN_GATHER3ALTDIV16SI,
31933 IX86_BUILTIN_GATHER3ALTSIV4DF,
31934 IX86_BUILTIN_GATHER3ALTDIV8SF,
31935 IX86_BUILTIN_GATHER3ALTSIV4DI,
31936 IX86_BUILTIN_GATHER3ALTDIV8SI,
31937 IX86_BUILTIN_GATHER3ALTSIV8DF,
31938 IX86_BUILTIN_GATHER3ALTSIV8DI,
31939 IX86_BUILTIN_GATHER3DIV16SF,
31940 IX86_BUILTIN_GATHER3DIV16SI,
31941 IX86_BUILTIN_GATHER3DIV8DF,
31942 IX86_BUILTIN_GATHER3DIV8DI,
31943 IX86_BUILTIN_GATHER3SIV16SF,
31944 IX86_BUILTIN_GATHER3SIV16SI,
31945 IX86_BUILTIN_GATHER3SIV8DF,
31946 IX86_BUILTIN_GATHER3SIV8DI,
31947 IX86_BUILTIN_SCATTERALTSIV8DF,
31948 IX86_BUILTIN_SCATTERALTDIV16SF,
31949 IX86_BUILTIN_SCATTERALTSIV8DI,
31950 IX86_BUILTIN_SCATTERALTDIV16SI,
31951 IX86_BUILTIN_SCATTERDIV16SF,
31952 IX86_BUILTIN_SCATTERDIV16SI,
31953 IX86_BUILTIN_SCATTERDIV8DF,
31954 IX86_BUILTIN_SCATTERDIV8DI,
31955 IX86_BUILTIN_SCATTERSIV16SF,
31956 IX86_BUILTIN_SCATTERSIV16SI,
31957 IX86_BUILTIN_SCATTERSIV8DF,
31958 IX86_BUILTIN_SCATTERSIV8DI,
31961 IX86_BUILTIN_GATHERPFQPD,
31962 IX86_BUILTIN_GATHERPFDPS,
31963 IX86_BUILTIN_GATHERPFDPD,
31964 IX86_BUILTIN_GATHERPFQPS,
31965 IX86_BUILTIN_SCATTERPFDPD,
31966 IX86_BUILTIN_SCATTERPFDPS,
31967 IX86_BUILTIN_SCATTERPFQPD,
31968 IX86_BUILTIN_SCATTERPFQPS,
31971 IX86_BUILTIN_EXP2PD_MASK,
31972 IX86_BUILTIN_EXP2PS_MASK,
31973 IX86_BUILTIN_EXP2PS,
31974 IX86_BUILTIN_RCP28PD,
31975 IX86_BUILTIN_RCP28PS,
31976 IX86_BUILTIN_RCP28SD,
31977 IX86_BUILTIN_RCP28SS,
31978 IX86_BUILTIN_RSQRT28PD,
31979 IX86_BUILTIN_RSQRT28PS,
31980 IX86_BUILTIN_RSQRT28SD,
31981 IX86_BUILTIN_RSQRT28SS,
31984 IX86_BUILTIN_VPMADD52LUQ512,
31985 IX86_BUILTIN_VPMADD52HUQ512,
31986 IX86_BUILTIN_VPMADD52LUQ256,
31987 IX86_BUILTIN_VPMADD52HUQ256,
31988 IX86_BUILTIN_VPMADD52LUQ128,
31989 IX86_BUILTIN_VPMADD52HUQ128,
31990 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
31991 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
31992 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
31993 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
31994 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
31995 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
31998 IX86_BUILTIN_VPMULTISHIFTQB512,
31999 IX86_BUILTIN_VPMULTISHIFTQB256,
32000 IX86_BUILTIN_VPMULTISHIFTQB128,
32001 IX86_BUILTIN_VPERMVARQI512_MASK,
32002 IX86_BUILTIN_VPERMT2VARQI512,
32003 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
32004 IX86_BUILTIN_VPERMI2VARQI512,
32005 IX86_BUILTIN_VPERMVARQI256_MASK,
32006 IX86_BUILTIN_VPERMVARQI128_MASK,
32007 IX86_BUILTIN_VPERMT2VARQI256,
32008 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
32009 IX86_BUILTIN_VPERMT2VARQI128,
32010 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32011 IX86_BUILTIN_VPERMI2VARQI256,
32012 IX86_BUILTIN_VPERMI2VARQI128,
32014 /* SHA builtins. */
32015 IX86_BUILTIN_SHA1MSG1,
32016 IX86_BUILTIN_SHA1MSG2,
32017 IX86_BUILTIN_SHA1NEXTE,
32018 IX86_BUILTIN_SHA1RNDS4,
32019 IX86_BUILTIN_SHA256MSG1,
32020 IX86_BUILTIN_SHA256MSG2,
32021 IX86_BUILTIN_SHA256RNDS2,
32023 /* CLWB instructions. */
32026 /* PCOMMIT instructions. */
32027 IX86_BUILTIN_PCOMMIT,
32029 /* CLFLUSHOPT instructions. */
32030 IX86_BUILTIN_CLFLUSHOPT,
32032 /* TFmode support builtins. */
32034 IX86_BUILTIN_HUGE_VALQ,
32035 IX86_BUILTIN_FABSQ,
32036 IX86_BUILTIN_COPYSIGNQ,
32038 /* Vectorizer support builtins. */
32039 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32040 IX86_BUILTIN_CPYSGNPS,
32041 IX86_BUILTIN_CPYSGNPD,
32042 IX86_BUILTIN_CPYSGNPS256,
32043 IX86_BUILTIN_CPYSGNPS512,
32044 IX86_BUILTIN_CPYSGNPD256,
32045 IX86_BUILTIN_CPYSGNPD512,
32046 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32047 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32050 /* FMA4 instructions. */
32051 IX86_BUILTIN_VFMADDSS,
32052 IX86_BUILTIN_VFMADDSD,
32053 IX86_BUILTIN_VFMADDPS,
32054 IX86_BUILTIN_VFMADDPD,
32055 IX86_BUILTIN_VFMADDPS256,
32056 IX86_BUILTIN_VFMADDPD256,
32057 IX86_BUILTIN_VFMADDSUBPS,
32058 IX86_BUILTIN_VFMADDSUBPD,
32059 IX86_BUILTIN_VFMADDSUBPS256,
32060 IX86_BUILTIN_VFMADDSUBPD256,
32062 /* FMA3 instructions. */
32063 IX86_BUILTIN_VFMADDSS3,
32064 IX86_BUILTIN_VFMADDSD3,
32066 /* XOP instructions. */
32067 IX86_BUILTIN_VPCMOV,
32068 IX86_BUILTIN_VPCMOV_V2DI,
32069 IX86_BUILTIN_VPCMOV_V4SI,
32070 IX86_BUILTIN_VPCMOV_V8HI,
32071 IX86_BUILTIN_VPCMOV_V16QI,
32072 IX86_BUILTIN_VPCMOV_V4SF,
32073 IX86_BUILTIN_VPCMOV_V2DF,
32074 IX86_BUILTIN_VPCMOV256,
32075 IX86_BUILTIN_VPCMOV_V4DI256,
32076 IX86_BUILTIN_VPCMOV_V8SI256,
32077 IX86_BUILTIN_VPCMOV_V16HI256,
32078 IX86_BUILTIN_VPCMOV_V32QI256,
32079 IX86_BUILTIN_VPCMOV_V8SF256,
32080 IX86_BUILTIN_VPCMOV_V4DF256,
32082 IX86_BUILTIN_VPPERM,
32084 IX86_BUILTIN_VPMACSSWW,
32085 IX86_BUILTIN_VPMACSWW,
32086 IX86_BUILTIN_VPMACSSWD,
32087 IX86_BUILTIN_VPMACSWD,
32088 IX86_BUILTIN_VPMACSSDD,
32089 IX86_BUILTIN_VPMACSDD,
32090 IX86_BUILTIN_VPMACSSDQL,
32091 IX86_BUILTIN_VPMACSSDQH,
32092 IX86_BUILTIN_VPMACSDQL,
32093 IX86_BUILTIN_VPMACSDQH,
32094 IX86_BUILTIN_VPMADCSSWD,
32095 IX86_BUILTIN_VPMADCSWD,
32097 IX86_BUILTIN_VPHADDBW,
32098 IX86_BUILTIN_VPHADDBD,
32099 IX86_BUILTIN_VPHADDBQ,
32100 IX86_BUILTIN_VPHADDWD,
32101 IX86_BUILTIN_VPHADDWQ,
32102 IX86_BUILTIN_VPHADDDQ,
32103 IX86_BUILTIN_VPHADDUBW,
32104 IX86_BUILTIN_VPHADDUBD,
32105 IX86_BUILTIN_VPHADDUBQ,
32106 IX86_BUILTIN_VPHADDUWD,
32107 IX86_BUILTIN_VPHADDUWQ,
32108 IX86_BUILTIN_VPHADDUDQ,
32109 IX86_BUILTIN_VPHSUBBW,
32110 IX86_BUILTIN_VPHSUBWD,
32111 IX86_BUILTIN_VPHSUBDQ,
32113 IX86_BUILTIN_VPROTB,
32114 IX86_BUILTIN_VPROTW,
32115 IX86_BUILTIN_VPROTD,
32116 IX86_BUILTIN_VPROTQ,
32117 IX86_BUILTIN_VPROTB_IMM,
32118 IX86_BUILTIN_VPROTW_IMM,
32119 IX86_BUILTIN_VPROTD_IMM,
32120 IX86_BUILTIN_VPROTQ_IMM,
32122 IX86_BUILTIN_VPSHLB,
32123 IX86_BUILTIN_VPSHLW,
32124 IX86_BUILTIN_VPSHLD,
32125 IX86_BUILTIN_VPSHLQ,
32126 IX86_BUILTIN_VPSHAB,
32127 IX86_BUILTIN_VPSHAW,
32128 IX86_BUILTIN_VPSHAD,
32129 IX86_BUILTIN_VPSHAQ,
32131 IX86_BUILTIN_VFRCZSS,
32132 IX86_BUILTIN_VFRCZSD,
32133 IX86_BUILTIN_VFRCZPS,
32134 IX86_BUILTIN_VFRCZPD,
32135 IX86_BUILTIN_VFRCZPS256,
32136 IX86_BUILTIN_VFRCZPD256,
32138 IX86_BUILTIN_VPCOMEQUB,
32139 IX86_BUILTIN_VPCOMNEUB,
32140 IX86_BUILTIN_VPCOMLTUB,
32141 IX86_BUILTIN_VPCOMLEUB,
32142 IX86_BUILTIN_VPCOMGTUB,
32143 IX86_BUILTIN_VPCOMGEUB,
32144 IX86_BUILTIN_VPCOMFALSEUB,
32145 IX86_BUILTIN_VPCOMTRUEUB,
32147 IX86_BUILTIN_VPCOMEQUW,
32148 IX86_BUILTIN_VPCOMNEUW,
32149 IX86_BUILTIN_VPCOMLTUW,
32150 IX86_BUILTIN_VPCOMLEUW,
32151 IX86_BUILTIN_VPCOMGTUW,
32152 IX86_BUILTIN_VPCOMGEUW,
32153 IX86_BUILTIN_VPCOMFALSEUW,
32154 IX86_BUILTIN_VPCOMTRUEUW,
32156 IX86_BUILTIN_VPCOMEQUD,
32157 IX86_BUILTIN_VPCOMNEUD,
32158 IX86_BUILTIN_VPCOMLTUD,
32159 IX86_BUILTIN_VPCOMLEUD,
32160 IX86_BUILTIN_VPCOMGTUD,
32161 IX86_BUILTIN_VPCOMGEUD,
32162 IX86_BUILTIN_VPCOMFALSEUD,
32163 IX86_BUILTIN_VPCOMTRUEUD,
32165 IX86_BUILTIN_VPCOMEQUQ,
32166 IX86_BUILTIN_VPCOMNEUQ,
32167 IX86_BUILTIN_VPCOMLTUQ,
32168 IX86_BUILTIN_VPCOMLEUQ,
32169 IX86_BUILTIN_VPCOMGTUQ,
32170 IX86_BUILTIN_VPCOMGEUQ,
32171 IX86_BUILTIN_VPCOMFALSEUQ,
32172 IX86_BUILTIN_VPCOMTRUEUQ,
32174 IX86_BUILTIN_VPCOMEQB,
32175 IX86_BUILTIN_VPCOMNEB,
32176 IX86_BUILTIN_VPCOMLTB,
32177 IX86_BUILTIN_VPCOMLEB,
32178 IX86_BUILTIN_VPCOMGTB,
32179 IX86_BUILTIN_VPCOMGEB,
32180 IX86_BUILTIN_VPCOMFALSEB,
32181 IX86_BUILTIN_VPCOMTRUEB,
32183 IX86_BUILTIN_VPCOMEQW,
32184 IX86_BUILTIN_VPCOMNEW,
32185 IX86_BUILTIN_VPCOMLTW,
32186 IX86_BUILTIN_VPCOMLEW,
32187 IX86_BUILTIN_VPCOMGTW,
32188 IX86_BUILTIN_VPCOMGEW,
32189 IX86_BUILTIN_VPCOMFALSEW,
32190 IX86_BUILTIN_VPCOMTRUEW,
32192 IX86_BUILTIN_VPCOMEQD,
32193 IX86_BUILTIN_VPCOMNED,
32194 IX86_BUILTIN_VPCOMLTD,
32195 IX86_BUILTIN_VPCOMLED,
32196 IX86_BUILTIN_VPCOMGTD,
32197 IX86_BUILTIN_VPCOMGED,
32198 IX86_BUILTIN_VPCOMFALSED,
32199 IX86_BUILTIN_VPCOMTRUED,
32201 IX86_BUILTIN_VPCOMEQQ,
32202 IX86_BUILTIN_VPCOMNEQ,
32203 IX86_BUILTIN_VPCOMLTQ,
32204 IX86_BUILTIN_VPCOMLEQ,
32205 IX86_BUILTIN_VPCOMGTQ,
32206 IX86_BUILTIN_VPCOMGEQ,
32207 IX86_BUILTIN_VPCOMFALSEQ,
32208 IX86_BUILTIN_VPCOMTRUEQ,
32210 /* LWP instructions. */
32211 IX86_BUILTIN_LLWPCB,
32212 IX86_BUILTIN_SLWPCB,
32213 IX86_BUILTIN_LWPVAL32,
32214 IX86_BUILTIN_LWPVAL64,
32215 IX86_BUILTIN_LWPINS32,
32216 IX86_BUILTIN_LWPINS64,
32221 IX86_BUILTIN_XBEGIN,
32223 IX86_BUILTIN_XABORT,
32224 IX86_BUILTIN_XTEST,
32227 IX86_BUILTIN_BNDMK,
32228 IX86_BUILTIN_BNDSTX,
32229 IX86_BUILTIN_BNDLDX,
32230 IX86_BUILTIN_BNDCL,
32231 IX86_BUILTIN_BNDCU,
32232 IX86_BUILTIN_BNDRET,
32233 IX86_BUILTIN_BNDNARROW,
32234 IX86_BUILTIN_BNDINT,
32235 IX86_BUILTIN_SIZEOF,
32236 IX86_BUILTIN_BNDLOWER,
32237 IX86_BUILTIN_BNDUPPER,
32239 /* BMI instructions. */
32240 IX86_BUILTIN_BEXTR32,
32241 IX86_BUILTIN_BEXTR64,
32244 /* TBM instructions. */
32245 IX86_BUILTIN_BEXTRI32,
32246 IX86_BUILTIN_BEXTRI64,
32248 /* BMI2 instructions. */
32249 IX86_BUILTIN_BZHI32,
32250 IX86_BUILTIN_BZHI64,
32251 IX86_BUILTIN_PDEP32,
32252 IX86_BUILTIN_PDEP64,
32253 IX86_BUILTIN_PEXT32,
32254 IX86_BUILTIN_PEXT64,
32256 /* ADX instructions. */
32257 IX86_BUILTIN_ADDCARRYX32,
32258 IX86_BUILTIN_ADDCARRYX64,
32260 /* SBB instructions. */
32261 IX86_BUILTIN_SBB32,
32262 IX86_BUILTIN_SBB64,
32264 /* FSGSBASE instructions. */
32265 IX86_BUILTIN_RDFSBASE32,
32266 IX86_BUILTIN_RDFSBASE64,
32267 IX86_BUILTIN_RDGSBASE32,
32268 IX86_BUILTIN_RDGSBASE64,
32269 IX86_BUILTIN_WRFSBASE32,
32270 IX86_BUILTIN_WRFSBASE64,
32271 IX86_BUILTIN_WRGSBASE32,
32272 IX86_BUILTIN_WRGSBASE64,
32274 /* RDRND instructions. */
32275 IX86_BUILTIN_RDRAND16_STEP,
32276 IX86_BUILTIN_RDRAND32_STEP,
32277 IX86_BUILTIN_RDRAND64_STEP,
32279 /* RDSEED instructions. */
32280 IX86_BUILTIN_RDSEED16_STEP,
32281 IX86_BUILTIN_RDSEED32_STEP,
32282 IX86_BUILTIN_RDSEED64_STEP,
32284 /* F16C instructions. */
32285 IX86_BUILTIN_CVTPH2PS,
32286 IX86_BUILTIN_CVTPH2PS256,
32287 IX86_BUILTIN_CVTPS2PH,
32288 IX86_BUILTIN_CVTPS2PH256,
32290 /* MONITORX and MWAITX instrucions. */
32291 IX86_BUILTIN_MONITORX,
32292 IX86_BUILTIN_MWAITX,
32294 /* CFString built-in for darwin */
32295 IX86_BUILTIN_CFSTRING,
32297 /* Builtins to get CPU type and supported features. */
32298 IX86_BUILTIN_CPU_INIT,
32299 IX86_BUILTIN_CPU_IS,
32300 IX86_BUILTIN_CPU_SUPPORTS,
32302 /* Read/write FLAGS register built-ins. */
32303 IX86_BUILTIN_READ_FLAGS,
32304 IX86_BUILTIN_WRITE_FLAGS,
32306 /* PKU instructions. */
32307 IX86_BUILTIN_RDPKRU,
32308 IX86_BUILTIN_WRPKRU,
32313 /* Table for the ix86 builtin decls. */
32314 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
32316 /* Table of all of the builtin functions that are possible with different ISA's
32317 but are waiting to be built until a function is declared to use that
32319 struct builtin_isa {
32320 const char *name; /* function name */
32321 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
32322 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
32323 bool const_p; /* true if the declaration is constant */
32324 bool leaf_p; /* true if the declaration has leaf attribute */
32325 bool nothrow_p; /* true if the declaration has nothrow attribute */
32326 bool set_and_not_built_p;
32329 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
32331 /* Bits that can still enable any inclusion of a builtin. */
32332 static HOST_WIDE_INT deferred_isa_values = 0;
32334 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32335 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32336 function decl in the ix86_builtins array. Returns the function decl or
32337 NULL_TREE, if the builtin was not added.
32339 If the front end has a special hook for builtin functions, delay adding
32340 builtin functions that aren't in the current ISA until the ISA is changed
32341 with function specific optimization. Doing so, can save about 300K for the
32342 default compiler. When the builtin is expanded, check at that time whether
32345 If the front end doesn't have a special hook, record all builtins, even if
32346 it isn't an instruction set in the current ISA in case the user uses
32347 function specific options for a different ISA, so that we don't get scope
32348 errors if a builtin is added in the middle of a function scope. */
32351 def_builtin (HOST_WIDE_INT mask, const char *name,
32352 enum ix86_builtin_func_type tcode,
32353 enum ix86_builtins code)
32355 tree decl = NULL_TREE;
32357 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
32359 ix86_builtins_isa[(int) code].isa = mask;
32361 mask &= ~OPTION_MASK_ISA_64BIT;
32363 || (mask & ix86_isa_flags) != 0
32364 || (lang_hooks.builtin_function
32365 == lang_hooks.builtin_function_ext_scope))
32368 tree type = ix86_get_builtin_func_type (tcode);
32369 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32371 ix86_builtins[(int) code] = decl;
32372 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
32376 /* Just a MASK where set_and_not_built_p == true can potentially
32377 include a builtin. */
32378 deferred_isa_values |= mask;
32379 ix86_builtins[(int) code] = NULL_TREE;
32380 ix86_builtins_isa[(int) code].tcode = tcode;
32381 ix86_builtins_isa[(int) code].name = name;
32382 ix86_builtins_isa[(int) code].leaf_p = false;
32383 ix86_builtins_isa[(int) code].nothrow_p = false;
32384 ix86_builtins_isa[(int) code].const_p = false;
32385 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
32392 /* Like def_builtin, but also marks the function decl "const". */
32395 def_builtin_const (HOST_WIDE_INT mask, const char *name,
32396 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
32398 tree decl = def_builtin (mask, name, tcode, code);
32400 TREE_READONLY (decl) = 1;
32402 ix86_builtins_isa[(int) code].const_p = true;
32407 /* Add any new builtin functions for a given ISA that may not have been
32408 declared. This saves a bit of space compared to adding all of the
32409 declarations to the tree, even if we didn't use them. */
32412 ix86_add_new_builtins (HOST_WIDE_INT isa)
32414 if ((isa & deferred_isa_values) == 0)
32417 /* Bits in ISA value can be removed from potential isa values. */
32418 deferred_isa_values &= ~isa;
32421 tree saved_current_target_pragma = current_target_pragma;
32422 current_target_pragma = NULL_TREE;
32424 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
32426 if ((ix86_builtins_isa[i].isa & isa) != 0
32427 && ix86_builtins_isa[i].set_and_not_built_p)
32431 /* Don't define the builtin again. */
32432 ix86_builtins_isa[i].set_and_not_built_p = false;
32434 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
32435 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
32436 type, i, BUILT_IN_MD, NULL,
32439 ix86_builtins[i] = decl;
32440 if (ix86_builtins_isa[i].const_p)
32441 TREE_READONLY (decl) = 1;
32442 if (ix86_builtins_isa[i].leaf_p)
32443 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32445 if (ix86_builtins_isa[i].nothrow_p)
32446 TREE_NOTHROW (decl) = 1;
32450 current_target_pragma = saved_current_target_pragma;
32453 /* Bits for builtin_description.flag. */
32455 /* Set when we don't support the comparison natively, and should
32456 swap_comparison in order to support it. */
32457 #define BUILTIN_DESC_SWAP_OPERANDS 1
32459 struct builtin_description
32461 const HOST_WIDE_INT mask;
32462 const enum insn_code icode;
32463 const char *const name;
32464 const enum ix86_builtins code;
32465 const enum rtx_code comparison;
32469 static const struct builtin_description bdesc_comi[] =
32471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32497 static const struct builtin_description bdesc_pcmpestr[] =
32500 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32501 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32502 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32503 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32504 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32505 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32506 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32509 static const struct builtin_description bdesc_pcmpistr[] =
32512 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32513 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32514 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32515 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32516 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32517 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32518 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32521 /* Special builtins with variable number of arguments. */
32522 static const struct builtin_description bdesc_special_args[] =
32524 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32525 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32526 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32528 /* 80387 (for use internally for atomic compound assignment). */
32529 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32530 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32531 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32532 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32535 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32540 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32541 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32542 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32543 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32544 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32545 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32546 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32547 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32548 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32550 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32551 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32552 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32553 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32554 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32555 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32556 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32557 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32569 /* SSE or 3DNow!A */
32570 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32571 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32581 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32589 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32592 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32595 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32596 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32602 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32603 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32604 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32689 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32690 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32691 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32692 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32693 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32694 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32697 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32698 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32699 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32700 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32701 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32702 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32703 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32704 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32707 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32708 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32709 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32712 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32713 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32714 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32715 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32718 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32719 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32720 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32721 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32754 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32756 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32805 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32806 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32814 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32816 /* RDPKRU and WRPKRU. */
32817 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32818 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
32821 /* Builtins with variable number of arguments. */
32822 static const struct builtin_description bdesc_args[] =
32824 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32825 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32826 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32827 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32828 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32829 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32830 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32837 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32844 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32850 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32852 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32854 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32855 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32859 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32865 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32871 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32872 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32879 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32884 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32885 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32886 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32887 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32888 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32889 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32891 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32892 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32893 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32894 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32897 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32898 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32899 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32900 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32902 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32903 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32904 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32905 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32906 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32907 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32908 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32909 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32910 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32911 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32912 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32913 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32914 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32915 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32916 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32919 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32920 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32921 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32922 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32923 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32924 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
32928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32929 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32931 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32935 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32938 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32942 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32943 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32944 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
32962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32965 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32972 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32973 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32977 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32979 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32980 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32982 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32987 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32988 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
32991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
32992 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
32994 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
32996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
33000 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
33001 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
33003 /* SSE MMX or 3Dnow!A */
33004 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33005 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33006 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33008 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33009 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33010 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33011 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33013 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33014 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33016 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33025 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33037 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33038 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33042 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33044 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33045 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33046 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33047 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
33058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33075 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33079 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33081 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33082 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33084 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33087 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33088 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33090 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33092 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33093 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33094 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33095 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33096 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33097 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33098 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33099 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33110 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33111 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
33113 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33115 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33116 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33128 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33129 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33130 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33133 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33134 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33135 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33136 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33137 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33138 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33139 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33140 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33146 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
33149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
33150 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
33154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
33155 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
33156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
33157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
33159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33160 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33161 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33162 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33163 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33164 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33165 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33168 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33169 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33170 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33171 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33172 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33173 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33175 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33176 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33177 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33178 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
33181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
33186 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33189 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33190 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33193 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
33194 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33196 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33197 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33198 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33199 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33200 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33201 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33204 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33205 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
33206 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33207 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
33208 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33209 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33217 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33220 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33221 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33222 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33223 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
33224 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
33225 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33226 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33229 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33230 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33231 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33232 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33233 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33234 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33237 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
33238 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
33241 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33242 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33243 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
33244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
33245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33248 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
33249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
33250 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
33252 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33253 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33254 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33255 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33256 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33257 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33258 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33259 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33260 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33261 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33268 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33269 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33271 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33272 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33273 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33274 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33275 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33276 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33280 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33281 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33282 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33283 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33285 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
33286 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
33287 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
33288 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
33290 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33291 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33293 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33294 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33296 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
33297 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
33298 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
33299 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
33301 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
33302 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
33304 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33305 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33307 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33308 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33309 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33312 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33313 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
33314 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
33315 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33316 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33319 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
33320 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
33321 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
33322 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33325 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
33326 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33328 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33329 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33330 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33331 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33334 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
33337 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33338 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33341 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33342 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33345 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33351 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33352 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33353 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33354 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33355 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33356 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33357 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33358 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33359 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33360 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33361 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33362 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
33365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
33366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
33367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
33372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
33373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
33383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
33384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
33385 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
33386 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
33387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
33388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
33390 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33392 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
33401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
33402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
33404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33408 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33410 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33412 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
33420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
33421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
33422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
33424 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33425 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
33431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
33432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
33433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
33435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
33436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
33438 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33439 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33449 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
33450 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
33451 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
33453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33472 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33473 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33475 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33478 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33479 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33480 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33481 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33482 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33483 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33484 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33485 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33486 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33487 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33488 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33489 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33490 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33491 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33492 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33493 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33494 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33495 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33496 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33497 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33498 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33499 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33500 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33501 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33502 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33503 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33504 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33505 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33506 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33507 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33508 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33509 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33510 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33511 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33512 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33513 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33514 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33515 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33516 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33517 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33518 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33519 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33520 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33521 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33522 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33523 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33524 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33525 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33526 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33527 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33528 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33529 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33531 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33532 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33533 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33534 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33535 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33536 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33537 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33538 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33539 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33540 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33541 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33542 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33543 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33544 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33545 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33546 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33547 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33548 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33549 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33550 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33551 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33552 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33553 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33554 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33555 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33557 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33559 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33560 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33561 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33562 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33563 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33564 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33565 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33566 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33567 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33568 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33569 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33570 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33571 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33572 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33592 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33613 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33614 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33615 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33616 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33625 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33628 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33629 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33630 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33633 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33634 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33637 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33638 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33639 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33640 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33643 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33644 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33645 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33646 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33647 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33648 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33706 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33707 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33817 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33818 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33819 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33820 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33852 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33857 /* Mask arithmetic operations */
33858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33879 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33880 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33889 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33890 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33891 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33892 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33917 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33918 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33919 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33920 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33921 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33922 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33923 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33924 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33925 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33926 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33927 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33928 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33929 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
33933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
33934 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
33935 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
33936 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33937 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33938 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33939 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33940 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33941 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33942 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33943 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33946 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33947 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33948 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33949 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
33969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
33970 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
33971 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
33972 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
33974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
33975 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
33976 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33977 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
33978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
33982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
33984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
33986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33989 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
33990 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
33991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
33992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
33993 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
33999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
34006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
34007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
34008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
34009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
34010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34018 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34019 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34020 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34021 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34022 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34023 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34025 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34026 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34027 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34028 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34051 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34057 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34058 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34059 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34060 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34063 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34072 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34073 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34074 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34079 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34080 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34081 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34082 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34083 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34084 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34117 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34118 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34136 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34137 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34141 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34143 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34144 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34145 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34146 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34147 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34148 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34149 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34150 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34151 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34152 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34199 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34200 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34261 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
34262 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
34263 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34265 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
34266 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
34267 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
34268 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
34269 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34270 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34275 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34276 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34277 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34289 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34290 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34291 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34292 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34293 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34294 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34295 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34296 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34321 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34322 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34324 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34353 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34356 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34357 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34358 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
34370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
34371 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
34372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
34373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
34374 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
34375 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
34376 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
34377 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
34378 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
34379 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
34380 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
34381 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
34382 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
34383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
34384 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
34385 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
34386 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
34387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
34388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
34389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34390 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34391 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34392 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34393 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34394 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34399 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34400 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34410 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34415 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34416 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34423 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34424 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34425 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
34426 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
34427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34467 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34477 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34483 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34484 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34485 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34486 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34494 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34495 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34496 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34497 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34522 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34532 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34536 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34537 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34538 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34559 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34578 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34585 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34586 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34588 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34593 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34594 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34595 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34596 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34597 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34598 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34599 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34600 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34601 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34602 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34603 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34604 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34605 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34606 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34607 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34608 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34609 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34610 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34611 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34612 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34613 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34614 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34615 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34616 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34617 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34618 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34619 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34620 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34621 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34622 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34623 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34626 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34627 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34628 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34629 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34630 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34631 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34632 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34633 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34634 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34635 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34636 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34637 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34638 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34639 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34640 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34641 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34642 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34643 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34644 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34645 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34646 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34647 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34648 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34649 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34650 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34651 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34670 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34671 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34672 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34673 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34674 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34675 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34676 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34677 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34678 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34679 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34681 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34682 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34683 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34684 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34685 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34686 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34687 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34688 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34689 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34690 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34691 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34692 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34693 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34694 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34695 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34696 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34697 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34698 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34699 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34700 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34705 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34706 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34707 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34708 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34709 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34710 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34711 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34712 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34713 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34714 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34715 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34716 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34719 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34720 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34721 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34722 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34723 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34724 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34725 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34726 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34727 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34728 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34729 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34730 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34733 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34734 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34735 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34736 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34737 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34738 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34739 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34740 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34741 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34742 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34743 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34744 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34745 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34746 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34747 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34750 /* Builtins with rounding support. */
34751 static const struct builtin_description bdesc_round_args[] =
34754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34773 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34775 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34782 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34784 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34834 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34836 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34838 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34840 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34842 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34844 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34846 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34848 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34875 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34876 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34877 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34878 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34879 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34880 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34881 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34882 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34883 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34884 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34895 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34896 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34897 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34898 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34899 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34900 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34901 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34902 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
34905 /* Bultins for MPX. */
34906 static const struct builtin_description bdesc_mpx[] =
34908 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
34909 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34910 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34913 /* Const builtins for MPX. */
34914 static const struct builtin_description bdesc_mpx_const[] =
34916 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
34917 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
34918 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
34919 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
34920 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
34921 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
34922 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
34923 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
34926 /* FMA4 and XOP. */
34927 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
34928 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
34929 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
34930 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
34931 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
34932 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
34933 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
34934 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
34935 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
34936 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
34937 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
34938 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
34939 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
34940 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
34941 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
34942 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
34943 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
34944 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
34945 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
34946 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
34947 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
34948 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
34949 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
34950 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
34951 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
34952 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
34953 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
34954 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
34955 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
34956 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
34957 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
34958 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
34959 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
34960 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
34961 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
34962 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
34963 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
34964 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
34965 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
34966 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
34967 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
34968 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
34969 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
34970 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
34971 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
34972 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
34973 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
34974 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
34975 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
34976 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
34977 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
34978 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
34980 static const struct builtin_description bdesc_multi_arg[] =
34982 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
34983 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
34984 UNKNOWN, (int)MULTI_ARG_3_SF },
34985 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
34986 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
34987 UNKNOWN, (int)MULTI_ARG_3_DF },
34989 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
34990 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
34991 UNKNOWN, (int)MULTI_ARG_3_SF },
34992 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
34993 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
34994 UNKNOWN, (int)MULTI_ARG_3_DF },
34996 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
34997 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
34998 UNKNOWN, (int)MULTI_ARG_3_SF },
34999 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
35000 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
35001 UNKNOWN, (int)MULTI_ARG_3_DF },
35002 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
35003 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
35004 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35005 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
35006 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
35007 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35009 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
35010 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35011 UNKNOWN, (int)MULTI_ARG_3_SF },
35012 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35013 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35014 UNKNOWN, (int)MULTI_ARG_3_DF },
35015 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35016 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35017 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35018 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35019 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35020 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
35055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
35056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
35057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
35058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
35059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
35060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
35061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
35063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
35064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
35065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
35067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
35068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
35070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
35071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
35072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
35073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
35074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
35075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
35077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
35094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
35097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
35098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
35099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
35101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
35102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
35105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
35106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
35107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
35109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
35110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
35113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
35114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
35115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
35117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
35121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
35122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
35123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
35125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
35126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
35129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
35130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
35131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
35133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
35134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
35137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
35138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
35139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
35141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
35142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
35145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
35146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
35147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
35149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
35153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
35154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
35155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
35157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
35176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
35177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
35178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
35182 /* TM vector builtins. */
35184 /* Reuse the existing x86-specific `struct builtin_description' cause
35185 we're lazy. Add casts to make them fit. */
35186 static const struct builtin_description bdesc_tm[] =
35188 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35189 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35190 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35191 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35192 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35193 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35194 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35196 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35197 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35198 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35199 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35200 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35201 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35202 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35204 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35205 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35206 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35207 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35208 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35209 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35210 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35212 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
35213 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
35214 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
35217 /* TM callbacks. */
35219 /* Return the builtin decl needed to load a vector of TYPE. */
35222 ix86_builtin_tm_load (tree type)
35224 if (TREE_CODE (type) == VECTOR_TYPE)
35226 switch (tree_to_uhwi (TYPE_SIZE (type)))
35229 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
35231 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
35233 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
35239 /* Return the builtin decl needed to store a vector of TYPE. */
35242 ix86_builtin_tm_store (tree type)
35244 if (TREE_CODE (type) == VECTOR_TYPE)
35246 switch (tree_to_uhwi (TYPE_SIZE (type)))
35249 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
35251 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
35253 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
35259 /* Initialize the transactional memory vector load/store builtins. */
35262 ix86_init_tm_builtins (void)
35264 enum ix86_builtin_func_type ftype;
35265 const struct builtin_description *d;
35268 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
35269 tree attrs_log, attrs_type_log;
35274 /* If there are no builtins defined, we must be compiling in a
35275 language without trans-mem support. */
35276 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
35279 /* Use whatever attributes a normal TM load has. */
35280 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
35281 attrs_load = DECL_ATTRIBUTES (decl);
35282 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35283 /* Use whatever attributes a normal TM store has. */
35284 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
35285 attrs_store = DECL_ATTRIBUTES (decl);
35286 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35287 /* Use whatever attributes a normal TM log has. */
35288 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
35289 attrs_log = DECL_ATTRIBUTES (decl);
35290 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35292 for (i = 0, d = bdesc_tm;
35293 i < ARRAY_SIZE (bdesc_tm);
35296 if ((d->mask & ix86_isa_flags) != 0
35297 || (lang_hooks.builtin_function
35298 == lang_hooks.builtin_function_ext_scope))
35300 tree type, attrs, attrs_type;
35301 enum built_in_function code = (enum built_in_function) d->code;
35303 ftype = (enum ix86_builtin_func_type) d->flag;
35304 type = ix86_get_builtin_func_type (ftype);
35306 if (BUILTIN_TM_LOAD_P (code))
35308 attrs = attrs_load;
35309 attrs_type = attrs_type_load;
35311 else if (BUILTIN_TM_STORE_P (code))
35313 attrs = attrs_store;
35314 attrs_type = attrs_type_store;
35319 attrs_type = attrs_type_log;
35321 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
35322 /* The builtin without the prefix for
35323 calling it directly. */
35324 d->name + strlen ("__builtin_"),
35326 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
35327 set the TYPE_ATTRIBUTES. */
35328 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
35330 set_builtin_decl (code, decl, false);
35335 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
35336 in the current target ISA to allow the user to compile particular modules
35337 with different target specific options that differ from the command line
35340 ix86_init_mmx_sse_builtins (void)
35342 const struct builtin_description * d;
35343 enum ix86_builtin_func_type ftype;
35346 /* Add all special builtins with variable number of operands. */
35347 for (i = 0, d = bdesc_special_args;
35348 i < ARRAY_SIZE (bdesc_special_args);
35354 ftype = (enum ix86_builtin_func_type) d->flag;
35355 def_builtin (d->mask, d->name, ftype, d->code);
35358 /* Add all builtins with variable number of operands. */
35359 for (i = 0, d = bdesc_args;
35360 i < ARRAY_SIZE (bdesc_args);
35366 ftype = (enum ix86_builtin_func_type) d->flag;
35367 def_builtin_const (d->mask, d->name, ftype, d->code);
35370 /* Add all builtins with rounding. */
35371 for (i = 0, d = bdesc_round_args;
35372 i < ARRAY_SIZE (bdesc_round_args);
35378 ftype = (enum ix86_builtin_func_type) d->flag;
35379 def_builtin_const (d->mask, d->name, ftype, d->code);
35382 /* pcmpestr[im] insns. */
35383 for (i = 0, d = bdesc_pcmpestr;
35384 i < ARRAY_SIZE (bdesc_pcmpestr);
35387 if (d->code == IX86_BUILTIN_PCMPESTRM128)
35388 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
35390 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
35391 def_builtin_const (d->mask, d->name, ftype, d->code);
35394 /* pcmpistr[im] insns. */
35395 for (i = 0, d = bdesc_pcmpistr;
35396 i < ARRAY_SIZE (bdesc_pcmpistr);
35399 if (d->code == IX86_BUILTIN_PCMPISTRM128)
35400 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
35402 ftype = INT_FTYPE_V16QI_V16QI_INT;
35403 def_builtin_const (d->mask, d->name, ftype, d->code);
35406 /* comi/ucomi insns. */
35407 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35409 if (d->mask == OPTION_MASK_ISA_SSE2)
35410 ftype = INT_FTYPE_V2DF_V2DF;
35412 ftype = INT_FTYPE_V4SF_V4SF;
35413 def_builtin_const (d->mask, d->name, ftype, d->code);
35417 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
35418 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
35419 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
35420 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
35422 /* SSE or 3DNow!A */
35423 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35424 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
35425 IX86_BUILTIN_MASKMOVQ);
35428 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
35429 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
35431 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
35432 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
35433 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
35434 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
35437 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
35438 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
35439 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
35440 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
35443 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
35444 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
35445 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
35446 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
35447 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
35448 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
35449 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
35450 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
35451 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
35452 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
35453 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
35454 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
35457 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
35458 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
35461 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35462 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35463 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35464 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35465 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35466 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35467 IX86_BUILTIN_RDRAND64_STEP);
35470 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35471 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35472 IX86_BUILTIN_GATHERSIV2DF);
35474 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35475 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35476 IX86_BUILTIN_GATHERSIV4DF);
35478 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35479 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35480 IX86_BUILTIN_GATHERDIV2DF);
35482 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35483 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35484 IX86_BUILTIN_GATHERDIV4DF);
35486 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35487 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35488 IX86_BUILTIN_GATHERSIV4SF);
35490 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35491 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35492 IX86_BUILTIN_GATHERSIV8SF);
35494 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35495 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35496 IX86_BUILTIN_GATHERDIV4SF);
35498 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35499 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35500 IX86_BUILTIN_GATHERDIV8SF);
35502 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35503 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35504 IX86_BUILTIN_GATHERSIV2DI);
35506 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35507 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35508 IX86_BUILTIN_GATHERSIV4DI);
35510 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35511 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35512 IX86_BUILTIN_GATHERDIV2DI);
35514 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35515 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35516 IX86_BUILTIN_GATHERDIV4DI);
35518 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35519 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35520 IX86_BUILTIN_GATHERSIV4SI);
35522 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35523 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35524 IX86_BUILTIN_GATHERSIV8SI);
35526 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35527 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35528 IX86_BUILTIN_GATHERDIV4SI);
35530 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35531 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35532 IX86_BUILTIN_GATHERDIV8SI);
35534 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35535 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35536 IX86_BUILTIN_GATHERALTSIV4DF);
35538 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35539 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35540 IX86_BUILTIN_GATHERALTDIV8SF);
35542 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35543 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35544 IX86_BUILTIN_GATHERALTSIV4DI);
35546 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35547 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35548 IX86_BUILTIN_GATHERALTDIV8SI);
35551 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35552 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35553 IX86_BUILTIN_GATHER3SIV16SF);
35555 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35556 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35557 IX86_BUILTIN_GATHER3SIV8DF);
35559 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35560 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35561 IX86_BUILTIN_GATHER3DIV16SF);
35563 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35564 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35565 IX86_BUILTIN_GATHER3DIV8DF);
35567 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35568 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35569 IX86_BUILTIN_GATHER3SIV16SI);
35571 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35572 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35573 IX86_BUILTIN_GATHER3SIV8DI);
35575 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35576 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35577 IX86_BUILTIN_GATHER3DIV16SI);
35579 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35580 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35581 IX86_BUILTIN_GATHER3DIV8DI);
35583 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35584 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35585 IX86_BUILTIN_GATHER3ALTSIV8DF);
35587 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35588 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35589 IX86_BUILTIN_GATHER3ALTDIV16SF);
35591 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35592 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35593 IX86_BUILTIN_GATHER3ALTSIV8DI);
35595 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35596 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35597 IX86_BUILTIN_GATHER3ALTDIV16SI);
35599 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35600 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35601 IX86_BUILTIN_SCATTERSIV16SF);
35603 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35604 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35605 IX86_BUILTIN_SCATTERSIV8DF);
35607 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35608 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35609 IX86_BUILTIN_SCATTERDIV16SF);
35611 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35612 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35613 IX86_BUILTIN_SCATTERDIV8DF);
35615 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35616 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35617 IX86_BUILTIN_SCATTERSIV16SI);
35619 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35620 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35621 IX86_BUILTIN_SCATTERSIV8DI);
35623 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35624 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35625 IX86_BUILTIN_SCATTERDIV16SI);
35627 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35628 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35629 IX86_BUILTIN_SCATTERDIV8DI);
35632 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35633 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35634 IX86_BUILTIN_GATHER3SIV2DF);
35636 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35637 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35638 IX86_BUILTIN_GATHER3SIV4DF);
35640 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35641 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35642 IX86_BUILTIN_GATHER3DIV2DF);
35644 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35645 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35646 IX86_BUILTIN_GATHER3DIV4DF);
35648 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35649 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35650 IX86_BUILTIN_GATHER3SIV4SF);
35652 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35653 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35654 IX86_BUILTIN_GATHER3SIV8SF);
35656 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35657 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35658 IX86_BUILTIN_GATHER3DIV4SF);
35660 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35661 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35662 IX86_BUILTIN_GATHER3DIV8SF);
35664 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35665 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35666 IX86_BUILTIN_GATHER3SIV2DI);
35668 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35669 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35670 IX86_BUILTIN_GATHER3SIV4DI);
35672 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35673 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35674 IX86_BUILTIN_GATHER3DIV2DI);
35676 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35677 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35678 IX86_BUILTIN_GATHER3DIV4DI);
35680 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35681 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35682 IX86_BUILTIN_GATHER3SIV4SI);
35684 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35685 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35686 IX86_BUILTIN_GATHER3SIV8SI);
35688 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35689 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35690 IX86_BUILTIN_GATHER3DIV4SI);
35692 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35693 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35694 IX86_BUILTIN_GATHER3DIV8SI);
35696 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35697 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35698 IX86_BUILTIN_GATHER3ALTSIV4DF);
35700 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35701 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35702 IX86_BUILTIN_GATHER3ALTDIV8SF);
35704 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35705 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35706 IX86_BUILTIN_GATHER3ALTSIV4DI);
35708 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35709 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35710 IX86_BUILTIN_GATHER3ALTDIV8SI);
35712 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35713 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35714 IX86_BUILTIN_SCATTERSIV8SF);
35716 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35717 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35718 IX86_BUILTIN_SCATTERSIV4SF);
35720 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35721 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35722 IX86_BUILTIN_SCATTERSIV4DF);
35724 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35725 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35726 IX86_BUILTIN_SCATTERSIV2DF);
35728 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35729 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35730 IX86_BUILTIN_SCATTERDIV8SF);
35732 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35733 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35734 IX86_BUILTIN_SCATTERDIV4SF);
35736 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35737 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35738 IX86_BUILTIN_SCATTERDIV4DF);
35740 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35741 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35742 IX86_BUILTIN_SCATTERDIV2DF);
35744 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35745 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35746 IX86_BUILTIN_SCATTERSIV8SI);
35748 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35749 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35750 IX86_BUILTIN_SCATTERSIV4SI);
35752 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35753 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35754 IX86_BUILTIN_SCATTERSIV4DI);
35756 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35757 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35758 IX86_BUILTIN_SCATTERSIV2DI);
35760 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35761 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35762 IX86_BUILTIN_SCATTERDIV8SI);
35764 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35765 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35766 IX86_BUILTIN_SCATTERDIV4SI);
35768 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35769 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35770 IX86_BUILTIN_SCATTERDIV4DI);
35772 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35773 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35774 IX86_BUILTIN_SCATTERDIV2DI);
35775 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35776 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35777 IX86_BUILTIN_SCATTERALTSIV8DF);
35779 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35780 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35781 IX86_BUILTIN_SCATTERALTDIV16SF);
35783 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35784 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35785 IX86_BUILTIN_SCATTERALTSIV8DI);
35787 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35788 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35789 IX86_BUILTIN_SCATTERALTDIV16SI);
35792 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35793 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35794 IX86_BUILTIN_GATHERPFDPD);
35795 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35796 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35797 IX86_BUILTIN_GATHERPFDPS);
35798 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35799 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35800 IX86_BUILTIN_GATHERPFQPD);
35801 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35802 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35803 IX86_BUILTIN_GATHERPFQPS);
35804 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35805 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35806 IX86_BUILTIN_SCATTERPFDPD);
35807 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35808 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35809 IX86_BUILTIN_SCATTERPFDPS);
35810 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35811 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35812 IX86_BUILTIN_SCATTERPFQPD);
35813 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35814 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35815 IX86_BUILTIN_SCATTERPFQPS);
35818 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35819 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35820 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35821 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35822 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35823 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35824 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35825 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35826 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35827 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35828 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35829 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35830 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35831 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35834 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35835 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35837 /* MMX access to the vec_init patterns. */
35838 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35839 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35841 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35842 V4HI_FTYPE_HI_HI_HI_HI,
35843 IX86_BUILTIN_VEC_INIT_V4HI);
35845 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35846 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35847 IX86_BUILTIN_VEC_INIT_V8QI);
35849 /* Access to the vec_extract patterns. */
35850 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35851 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35852 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35853 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35854 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35855 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35856 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35857 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35858 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35859 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35861 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35862 "__builtin_ia32_vec_ext_v4hi",
35863 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35865 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35866 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35868 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35869 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35871 /* Access to the vec_set patterns. */
35872 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35873 "__builtin_ia32_vec_set_v2di",
35874 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35876 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35877 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35879 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35880 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35882 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35883 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35885 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35886 "__builtin_ia32_vec_set_v4hi",
35887 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35889 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35890 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35893 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35894 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35895 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35896 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35897 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35898 "__builtin_ia32_rdseed_di_step",
35899 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35902 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35903 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35904 def_builtin (OPTION_MASK_ISA_64BIT,
35905 "__builtin_ia32_addcarryx_u64",
35906 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35907 IX86_BUILTIN_ADDCARRYX64);
35910 def_builtin (0, "__builtin_ia32_sbb_u32",
35911 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35912 def_builtin (OPTION_MASK_ISA_64BIT,
35913 "__builtin_ia32_sbb_u64",
35914 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35915 IX86_BUILTIN_SBB64);
35917 /* Read/write FLAGS. */
35918 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35919 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35920 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35921 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35922 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35923 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35924 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35925 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35928 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35929 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35932 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35933 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35935 /* MONITORX and MWAITX. */
35936 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35937 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35938 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35939 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35942 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
35943 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
35945 /* Add FMA4 multi-arg argument instructions */
35946 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35951 ftype = (enum ix86_builtin_func_type) d->flag;
35952 def_builtin_const (d->mask, d->name, ftype, d->code);
35957 ix86_init_mpx_builtins ()
35959 const struct builtin_description * d;
35960 enum ix86_builtin_func_type ftype;
35964 for (i = 0, d = bdesc_mpx;
35965 i < ARRAY_SIZE (bdesc_mpx);
35971 ftype = (enum ix86_builtin_func_type) d->flag;
35972 decl = def_builtin (d->mask, d->name, ftype, d->code);
35974 /* With no leaf and nothrow flags for MPX builtins
35975 abnormal edges may follow its call when setjmp
35976 presents in the function. Since we may have a lot
35977 of MPX builtins calls it causes lots of useless
35978 edges and enormous PHI nodes. To avoid this we mark
35979 MPX builtins as leaf and nothrow. */
35982 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35984 TREE_NOTHROW (decl) = 1;
35988 ix86_builtins_isa[(int)d->code].leaf_p = true;
35989 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35993 for (i = 0, d = bdesc_mpx_const;
35994 i < ARRAY_SIZE (bdesc_mpx_const);
36000 ftype = (enum ix86_builtin_func_type) d->flag;
36001 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
36005 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
36007 TREE_NOTHROW (decl) = 1;
36011 ix86_builtins_isa[(int)d->code].leaf_p = true;
36012 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36017 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36018 to return a pointer to VERSION_DECL if the outcome of the expression
36019 formed by PREDICATE_CHAIN is true. This function will be called during
36020 version dispatch to decide which function version to execute. It returns
36021 the basic block at the end, to which more conditions can be added. */
36024 add_condition_to_bb (tree function_decl, tree version_decl,
36025 tree predicate_chain, basic_block new_bb)
36027 gimple *return_stmt;
36028 tree convert_expr, result_var;
36029 gimple *convert_stmt;
36030 gimple *call_cond_stmt;
36031 gimple *if_else_stmt;
36033 basic_block bb1, bb2, bb3;
36036 tree cond_var, and_expr_var = NULL_TREE;
36039 tree predicate_decl, predicate_arg;
36041 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36043 gcc_assert (new_bb != NULL);
36044 gseq = bb_seq (new_bb);
36047 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36048 build_fold_addr_expr (version_decl));
36049 result_var = create_tmp_var (ptr_type_node);
36050 convert_stmt = gimple_build_assign (result_var, convert_expr);
36051 return_stmt = gimple_build_return (result_var);
36053 if (predicate_chain == NULL_TREE)
36055 gimple_seq_add_stmt (&gseq, convert_stmt);
36056 gimple_seq_add_stmt (&gseq, return_stmt);
36057 set_bb_seq (new_bb, gseq);
36058 gimple_set_bb (convert_stmt, new_bb);
36059 gimple_set_bb (return_stmt, new_bb);
36064 while (predicate_chain != NULL)
36066 cond_var = create_tmp_var (integer_type_node);
36067 predicate_decl = TREE_PURPOSE (predicate_chain);
36068 predicate_arg = TREE_VALUE (predicate_chain);
36069 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36070 gimple_call_set_lhs (call_cond_stmt, cond_var);
36072 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36073 gimple_set_bb (call_cond_stmt, new_bb);
36074 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36076 predicate_chain = TREE_CHAIN (predicate_chain);
36078 if (and_expr_var == NULL)
36079 and_expr_var = cond_var;
36082 gimple *assign_stmt;
36083 /* Use MIN_EXPR to check if any integer is zero?.
36084 and_expr_var = min_expr <cond_var, and_expr_var> */
36085 assign_stmt = gimple_build_assign (and_expr_var,
36086 build2 (MIN_EXPR, integer_type_node,
36087 cond_var, and_expr_var));
36089 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
36090 gimple_set_bb (assign_stmt, new_bb);
36091 gimple_seq_add_stmt (&gseq, assign_stmt);
36095 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
36097 NULL_TREE, NULL_TREE);
36098 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36099 gimple_set_bb (if_else_stmt, new_bb);
36100 gimple_seq_add_stmt (&gseq, if_else_stmt);
36102 gimple_seq_add_stmt (&gseq, convert_stmt);
36103 gimple_seq_add_stmt (&gseq, return_stmt);
36104 set_bb_seq (new_bb, gseq);
36107 e12 = split_block (bb1, if_else_stmt);
36109 e12->flags &= ~EDGE_FALLTHRU;
36110 e12->flags |= EDGE_TRUE_VALUE;
36112 e23 = split_block (bb2, return_stmt);
36114 gimple_set_bb (convert_stmt, bb2);
36115 gimple_set_bb (return_stmt, bb2);
36118 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36121 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36128 /* This parses the attribute arguments to target in DECL and determines
36129 the right builtin to use to match the platform specification.
36130 It returns the priority value for this version decl. If PREDICATE_LIST
36131 is not NULL, it stores the list of cpu features that need to be checked
36132 before dispatching this function. */
36134 static unsigned int
36135 get_builtin_code_for_version (tree decl, tree *predicate_list)
36138 struct cl_target_option cur_target;
36140 struct cl_target_option *new_target;
36141 const char *arg_str = NULL;
36142 const char *attrs_str = NULL;
36143 char *tok_str = NULL;
36146 /* Priority of i386 features, greater value is higher priority. This is
36147 used to decide the order in which function dispatch must happen. For
36148 instance, a version specialized for SSE4.2 should be checked for dispatch
36149 before a version for SSE3, as SSE4.2 implies SSE3. */
36150 enum feature_priority
36183 enum feature_priority priority = P_ZERO;
36185 /* These are the target attribute strings for which a dispatcher is
36186 available, from fold_builtin_cpu. */
36188 static struct _feature_list
36190 const char *const name;
36191 const enum feature_priority priority;
36193 const feature_list[] =
36199 {"sse4a", P_SSE4_A},
36200 {"ssse3", P_SSSE3},
36201 {"sse4.1", P_SSE4_1},
36202 {"sse4.2", P_SSE4_2},
36203 {"popcnt", P_POPCNT},
36205 {"pclmul", P_PCLMUL},
36213 {"avx512f", P_AVX512F}
36217 static unsigned int NUM_FEATURES
36218 = sizeof (feature_list) / sizeof (struct _feature_list);
36222 tree predicate_chain = NULL_TREE;
36223 tree predicate_decl, predicate_arg;
36225 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36226 gcc_assert (attrs != NULL);
36228 attrs = TREE_VALUE (TREE_VALUE (attrs));
36230 gcc_assert (TREE_CODE (attrs) == STRING_CST);
36231 attrs_str = TREE_STRING_POINTER (attrs);
36233 /* Return priority zero for default function. */
36234 if (strcmp (attrs_str, "default") == 0)
36237 /* Handle arch= if specified. For priority, set it to be 1 more than
36238 the best instruction set the processor can handle. For instance, if
36239 there is a version for atom and a version for ssse3 (the highest ISA
36240 priority for atom), the atom version must be checked for dispatch
36241 before the ssse3 version. */
36242 if (strstr (attrs_str, "arch=") != NULL)
36244 cl_target_option_save (&cur_target, &global_options);
36245 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
36246 &global_options_set);
36248 gcc_assert (target_node);
36249 new_target = TREE_TARGET_OPTION (target_node);
36250 gcc_assert (new_target);
36252 if (new_target->arch_specified && new_target->arch > 0)
36254 switch (new_target->arch)
36256 case PROCESSOR_CORE2:
36258 priority = P_PROC_SSSE3;
36260 case PROCESSOR_NEHALEM:
36261 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
36262 arg_str = "westmere";
36264 /* We translate "arch=corei7" and "arch=nehalem" to
36265 "corei7" so that it will be mapped to M_INTEL_COREI7
36266 as cpu type to cover all M_INTEL_COREI7_XXXs. */
36267 arg_str = "corei7";
36268 priority = P_PROC_SSE4_2;
36270 case PROCESSOR_SANDYBRIDGE:
36271 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
36272 arg_str = "ivybridge";
36274 arg_str = "sandybridge";
36275 priority = P_PROC_AVX;
36277 case PROCESSOR_HASWELL:
36278 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
36279 arg_str = "skylake-avx512";
36280 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
36281 arg_str = "skylake";
36282 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
36283 arg_str = "broadwell";
36285 arg_str = "haswell";
36286 priority = P_PROC_AVX2;
36288 case PROCESSOR_BONNELL:
36289 arg_str = "bonnell";
36290 priority = P_PROC_SSSE3;
36292 case PROCESSOR_KNL:
36294 priority = P_PROC_AVX512F;
36296 case PROCESSOR_SILVERMONT:
36297 arg_str = "silvermont";
36298 priority = P_PROC_SSE4_2;
36300 case PROCESSOR_AMDFAM10:
36301 arg_str = "amdfam10h";
36302 priority = P_PROC_SSE4_A;
36304 case PROCESSOR_BTVER1:
36305 arg_str = "btver1";
36306 priority = P_PROC_SSE4_A;
36308 case PROCESSOR_BTVER2:
36309 arg_str = "btver2";
36310 priority = P_PROC_BMI;
36312 case PROCESSOR_BDVER1:
36313 arg_str = "bdver1";
36314 priority = P_PROC_XOP;
36316 case PROCESSOR_BDVER2:
36317 arg_str = "bdver2";
36318 priority = P_PROC_FMA;
36320 case PROCESSOR_BDVER3:
36321 arg_str = "bdver3";
36322 priority = P_PROC_FMA;
36324 case PROCESSOR_BDVER4:
36325 arg_str = "bdver4";
36326 priority = P_PROC_AVX2;
36328 case PROCESSOR_ZNVER1:
36329 arg_str = "znver1";
36330 priority = P_PROC_AVX2;
36335 cl_target_option_restore (&global_options, &cur_target);
36337 if (predicate_list && arg_str == NULL)
36339 error_at (DECL_SOURCE_LOCATION (decl),
36340 "No dispatcher found for the versioning attributes");
36344 if (predicate_list)
36346 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
36347 /* For a C string literal the length includes the trailing NULL. */
36348 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36349 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36354 /* Process feature name. */
36355 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
36356 strcpy (tok_str, attrs_str);
36357 token = strtok (tok_str, ",");
36358 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
36360 while (token != NULL)
36362 /* Do not process "arch=" */
36363 if (strncmp (token, "arch=", 5) == 0)
36365 token = strtok (NULL, ",");
36368 for (i = 0; i < NUM_FEATURES; ++i)
36370 if (strcmp (token, feature_list[i].name) == 0)
36372 if (predicate_list)
36374 predicate_arg = build_string_literal (
36375 strlen (feature_list[i].name) + 1,
36376 feature_list[i].name);
36377 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36380 /* Find the maximum priority feature. */
36381 if (feature_list[i].priority > priority)
36382 priority = feature_list[i].priority;
36387 if (predicate_list && i == NUM_FEATURES)
36389 error_at (DECL_SOURCE_LOCATION (decl),
36390 "No dispatcher found for %s", token);
36393 token = strtok (NULL, ",");
36397 if (predicate_list && predicate_chain == NULL_TREE)
36399 error_at (DECL_SOURCE_LOCATION (decl),
36400 "No dispatcher found for the versioning attributes : %s",
36404 else if (predicate_list)
36406 predicate_chain = nreverse (predicate_chain);
36407 *predicate_list = predicate_chain;
36413 /* This compares the priority of target features in function DECL1
36414 and DECL2. It returns positive value if DECL1 is higher priority,
36415 negative value if DECL2 is higher priority and 0 if they are the
36419 ix86_compare_version_priority (tree decl1, tree decl2)
36421 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
36422 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
36424 return (int)priority1 - (int)priority2;
36427 /* V1 and V2 point to function versions with different priorities
36428 based on the target ISA. This function compares their priorities. */
36431 feature_compare (const void *v1, const void *v2)
36433 typedef struct _function_version_info
36436 tree predicate_chain;
36437 unsigned int dispatch_priority;
36438 } function_version_info;
36440 const function_version_info c1 = *(const function_version_info *)v1;
36441 const function_version_info c2 = *(const function_version_info *)v2;
36442 return (c2.dispatch_priority - c1.dispatch_priority);
36445 /* This function generates the dispatch function for
36446 multi-versioned functions. DISPATCH_DECL is the function which will
36447 contain the dispatch logic. FNDECLS are the function choices for
36448 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
36449 in DISPATCH_DECL in which the dispatch code is generated. */
36452 dispatch_function_versions (tree dispatch_decl,
36454 basic_block *empty_bb)
36457 gimple *ifunc_cpu_init_stmt;
36461 vec<tree> *fndecls;
36462 unsigned int num_versions = 0;
36463 unsigned int actual_versions = 0;
36466 struct _function_version_info
36469 tree predicate_chain;
36470 unsigned int dispatch_priority;
36471 }*function_version_info;
36473 gcc_assert (dispatch_decl != NULL
36474 && fndecls_p != NULL
36475 && empty_bb != NULL);
36477 /*fndecls_p is actually a vector. */
36478 fndecls = static_cast<vec<tree> *> (fndecls_p);
36480 /* At least one more version other than the default. */
36481 num_versions = fndecls->length ();
36482 gcc_assert (num_versions >= 2);
36484 function_version_info = (struct _function_version_info *)
36485 XNEWVEC (struct _function_version_info, (num_versions - 1));
36487 /* The first version in the vector is the default decl. */
36488 default_decl = (*fndecls)[0];
36490 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36492 gseq = bb_seq (*empty_bb);
36493 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36494 constructors, so explicity call __builtin_cpu_init here. */
36495 ifunc_cpu_init_stmt = gimple_build_call_vec (
36496 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36497 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36498 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36499 set_bb_seq (*empty_bb, gseq);
36504 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36506 tree version_decl = ele;
36507 tree predicate_chain = NULL_TREE;
36508 unsigned int priority;
36509 /* Get attribute string, parse it and find the right predicate decl.
36510 The predicate function could be a lengthy combination of many
36511 features, like arch-type and various isa-variants. */
36512 priority = get_builtin_code_for_version (version_decl,
36515 if (predicate_chain == NULL_TREE)
36518 function_version_info [actual_versions].version_decl = version_decl;
36519 function_version_info [actual_versions].predicate_chain
36521 function_version_info [actual_versions].dispatch_priority = priority;
36525 /* Sort the versions according to descending order of dispatch priority. The
36526 priority is based on the ISA. This is not a perfect solution. There
36527 could still be ambiguity. If more than one function version is suitable
36528 to execute, which one should be dispatched? In future, allow the user
36529 to specify a dispatch priority next to the version. */
36530 qsort (function_version_info, actual_versions,
36531 sizeof (struct _function_version_info), feature_compare);
36533 for (i = 0; i < actual_versions; ++i)
36534 *empty_bb = add_condition_to_bb (dispatch_decl,
36535 function_version_info[i].version_decl,
36536 function_version_info[i].predicate_chain,
36539 /* dispatch default version at the end. */
36540 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36543 free (function_version_info);
36547 /* Comparator function to be used in qsort routine to sort attribute
36548 specification strings to "target". */
36551 attr_strcmp (const void *v1, const void *v2)
36553 const char *c1 = *(char *const*)v1;
36554 const char *c2 = *(char *const*)v2;
36555 return strcmp (c1, c2);
36558 /* ARGLIST is the argument to target attribute. This function tokenizes
36559 the comma separated arguments, sorts them and returns a string which
36560 is a unique identifier for the comma separated arguments. It also
36561 replaces non-identifier characters "=,-" with "_". */
36564 sorted_attr_string (tree arglist)
36567 size_t str_len_sum = 0;
36568 char **args = NULL;
36569 char *attr_str, *ret_str;
36571 unsigned int argnum = 1;
36574 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36576 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36577 size_t len = strlen (str);
36578 str_len_sum += len + 1;
36579 if (arg != arglist)
36581 for (i = 0; i < strlen (str); i++)
36586 attr_str = XNEWVEC (char, str_len_sum);
36588 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36590 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36591 size_t len = strlen (str);
36592 memcpy (attr_str + str_len_sum, str, len);
36593 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36594 str_len_sum += len + 1;
36597 /* Replace "=,-" with "_". */
36598 for (i = 0; i < strlen (attr_str); i++)
36599 if (attr_str[i] == '=' || attr_str[i]== '-')
36605 args = XNEWVEC (char *, argnum);
36608 attr = strtok (attr_str, ",");
36609 while (attr != NULL)
36613 attr = strtok (NULL, ",");
36616 qsort (args, argnum, sizeof (char *), attr_strcmp);
36618 ret_str = XNEWVEC (char, str_len_sum);
36620 for (i = 0; i < argnum; i++)
36622 size_t len = strlen (args[i]);
36623 memcpy (ret_str + str_len_sum, args[i], len);
36624 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36625 str_len_sum += len + 1;
36629 XDELETEVEC (attr_str);
36633 /* This function changes the assembler name for functions that are
36634 versions. If DECL is a function version and has a "target"
36635 attribute, it appends the attribute string to its assembler name. */
36638 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36641 const char *orig_name, *version_string;
36642 char *attr_str, *assembler_name;
36644 if (DECL_DECLARED_INLINE_P (decl)
36645 && lookup_attribute ("gnu_inline",
36646 DECL_ATTRIBUTES (decl)))
36647 error_at (DECL_SOURCE_LOCATION (decl),
36648 "Function versions cannot be marked as gnu_inline,"
36649 " bodies have to be generated");
36651 if (DECL_VIRTUAL_P (decl)
36652 || DECL_VINDEX (decl))
36653 sorry ("Virtual function multiversioning not supported");
36655 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36657 /* target attribute string cannot be NULL. */
36658 gcc_assert (version_attr != NULL_TREE);
36660 orig_name = IDENTIFIER_POINTER (id);
36662 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36664 if (strcmp (version_string, "default") == 0)
36667 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36668 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36670 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36672 /* Allow assembler name to be modified if already set. */
36673 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36674 SET_DECL_RTL (decl, NULL);
36676 tree ret = get_identifier (assembler_name);
36677 XDELETEVEC (attr_str);
36678 XDELETEVEC (assembler_name);
36682 /* This function returns true if FN1 and FN2 are versions of the same function,
36683 that is, the target strings of the function decls are different. This assumes
36684 that FN1 and FN2 have the same signature. */
36687 ix86_function_versions (tree fn1, tree fn2)
36690 char *target1, *target2;
36693 if (TREE_CODE (fn1) != FUNCTION_DECL
36694 || TREE_CODE (fn2) != FUNCTION_DECL)
36697 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36698 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36700 /* At least one function decl should have the target attribute specified. */
36701 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36704 /* Diagnose missing target attribute if one of the decls is already
36705 multi-versioned. */
36706 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36708 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36710 if (attr2 != NULL_TREE)
36712 std::swap (fn1, fn2);
36715 error_at (DECL_SOURCE_LOCATION (fn2),
36716 "missing %<target%> attribute for multi-versioned %D",
36718 inform (DECL_SOURCE_LOCATION (fn1),
36719 "previous declaration of %D", fn1);
36720 /* Prevent diagnosing of the same error multiple times. */
36721 DECL_ATTRIBUTES (fn2)
36722 = tree_cons (get_identifier ("target"),
36723 copy_node (TREE_VALUE (attr1)),
36724 DECL_ATTRIBUTES (fn2));
36729 target1 = sorted_attr_string (TREE_VALUE (attr1));
36730 target2 = sorted_attr_string (TREE_VALUE (attr2));
36732 /* The sorted target strings must be different for fn1 and fn2
36734 if (strcmp (target1, target2) == 0)
36739 XDELETEVEC (target1);
36740 XDELETEVEC (target2);
36746 ix86_mangle_decl_assembler_name (tree decl, tree id)
36748 /* For function version, add the target suffix to the assembler name. */
36749 if (TREE_CODE (decl) == FUNCTION_DECL
36750 && DECL_FUNCTION_VERSIONED (decl))
36751 id = ix86_mangle_function_version_assembler_name (decl, id);
36752 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36753 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36759 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36760 is true, append the full path name of the source file. */
36763 make_name (tree decl, const char *suffix, bool make_unique)
36765 char *global_var_name;
36768 const char *unique_name = NULL;
36770 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36772 /* Get a unique name that can be used globally without any chances
36773 of collision at link time. */
36775 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36777 name_len = strlen (name) + strlen (suffix) + 2;
36780 name_len += strlen (unique_name) + 1;
36781 global_var_name = XNEWVEC (char, name_len);
36783 /* Use '.' to concatenate names as it is demangler friendly. */
36785 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36788 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36790 return global_var_name;
36793 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36795 /* Make a dispatcher declaration for the multi-versioned function DECL.
36796 Calls to DECL function will be replaced with calls to the dispatcher
36797 by the front-end. Return the decl created. */
36800 make_dispatcher_decl (const tree decl)
36804 tree fn_type, func_type;
36805 bool is_uniq = false;
36807 if (TREE_PUBLIC (decl) == 0)
36810 func_name = make_name (decl, "ifunc", is_uniq);
36812 fn_type = TREE_TYPE (decl);
36813 func_type = build_function_type (TREE_TYPE (fn_type),
36814 TYPE_ARG_TYPES (fn_type));
36816 func_decl = build_fn_decl (func_name, func_type);
36817 XDELETEVEC (func_name);
36818 TREE_USED (func_decl) = 1;
36819 DECL_CONTEXT (func_decl) = NULL_TREE;
36820 DECL_INITIAL (func_decl) = error_mark_node;
36821 DECL_ARTIFICIAL (func_decl) = 1;
36822 /* Mark this func as external, the resolver will flip it again if
36823 it gets generated. */
36824 DECL_EXTERNAL (func_decl) = 1;
36825 /* This will be of type IFUNCs have to be externally visible. */
36826 TREE_PUBLIC (func_decl) = 1;
36833 /* Returns true if decl is multi-versioned and DECL is the default function,
36834 that is it is not tagged with target specific optimization. */
36837 is_function_default_version (const tree decl)
36839 if (TREE_CODE (decl) != FUNCTION_DECL
36840 || !DECL_FUNCTION_VERSIONED (decl))
36842 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36844 attr = TREE_VALUE (TREE_VALUE (attr));
36845 return (TREE_CODE (attr) == STRING_CST
36846 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36849 /* Make a dispatcher declaration for the multi-versioned function DECL.
36850 Calls to DECL function will be replaced with calls to the dispatcher
36851 by the front-end. Returns the decl of the dispatcher function. */
36854 ix86_get_function_versions_dispatcher (void *decl)
36856 tree fn = (tree) decl;
36857 struct cgraph_node *node = NULL;
36858 struct cgraph_node *default_node = NULL;
36859 struct cgraph_function_version_info *node_v = NULL;
36860 struct cgraph_function_version_info *first_v = NULL;
36862 tree dispatch_decl = NULL;
36864 struct cgraph_function_version_info *default_version_info = NULL;
36866 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36868 node = cgraph_node::get (fn);
36869 gcc_assert (node != NULL);
36871 node_v = node->function_version ();
36872 gcc_assert (node_v != NULL);
36874 if (node_v->dispatcher_resolver != NULL)
36875 return node_v->dispatcher_resolver;
36877 /* Find the default version and make it the first node. */
36879 /* Go to the beginning of the chain. */
36880 while (first_v->prev != NULL)
36881 first_v = first_v->prev;
36882 default_version_info = first_v;
36883 while (default_version_info != NULL)
36885 if (is_function_default_version
36886 (default_version_info->this_node->decl))
36888 default_version_info = default_version_info->next;
36891 /* If there is no default node, just return NULL. */
36892 if (default_version_info == NULL)
36895 /* Make default info the first node. */
36896 if (first_v != default_version_info)
36898 default_version_info->prev->next = default_version_info->next;
36899 if (default_version_info->next)
36900 default_version_info->next->prev = default_version_info->prev;
36901 first_v->prev = default_version_info;
36902 default_version_info->next = first_v;
36903 default_version_info->prev = NULL;
36906 default_node = default_version_info->this_node;
36908 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36909 if (targetm.has_ifunc_p ())
36911 struct cgraph_function_version_info *it_v = NULL;
36912 struct cgraph_node *dispatcher_node = NULL;
36913 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36915 /* Right now, the dispatching is done via ifunc. */
36916 dispatch_decl = make_dispatcher_decl (default_node->decl);
36918 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36919 gcc_assert (dispatcher_node != NULL);
36920 dispatcher_node->dispatcher_function = 1;
36921 dispatcher_version_info
36922 = dispatcher_node->insert_new_function_version ();
36923 dispatcher_version_info->next = default_version_info;
36924 dispatcher_node->definition = 1;
36926 /* Set the dispatcher for all the versions. */
36927 it_v = default_version_info;
36928 while (it_v != NULL)
36930 it_v->dispatcher_resolver = dispatch_decl;
36937 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36938 "multiversioning needs ifunc which is not supported "
36942 return dispatch_decl;
36945 /* Make the resolver function decl to dispatch the versions of
36946 a multi-versioned function, DEFAULT_DECL. Create an
36947 empty basic block in the resolver and store the pointer in
36948 EMPTY_BB. Return the decl of the resolver function. */
36951 make_resolver_func (const tree default_decl,
36952 const tree dispatch_decl,
36953 basic_block *empty_bb)
36955 char *resolver_name;
36956 tree decl, type, decl_name, t;
36957 bool is_uniq = false;
36959 /* IFUNC's have to be globally visible. So, if the default_decl is
36960 not, then the name of the IFUNC should be made unique. */
36961 if (TREE_PUBLIC (default_decl) == 0)
36964 /* Append the filename to the resolver function if the versions are
36965 not externally visible. This is because the resolver function has
36966 to be externally visible for the loader to find it. So, appending
36967 the filename will prevent conflicts with a resolver function from
36968 another module which is based on the same version name. */
36969 resolver_name = make_name (default_decl, "resolver", is_uniq);
36971 /* The resolver function should return a (void *). */
36972 type = build_function_type_list (ptr_type_node, NULL_TREE);
36974 decl = build_fn_decl (resolver_name, type);
36975 decl_name = get_identifier (resolver_name);
36976 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36978 DECL_NAME (decl) = decl_name;
36979 TREE_USED (decl) = 1;
36980 DECL_ARTIFICIAL (decl) = 1;
36981 DECL_IGNORED_P (decl) = 0;
36982 /* IFUNC resolvers have to be externally visible. */
36983 TREE_PUBLIC (decl) = 1;
36984 DECL_UNINLINABLE (decl) = 1;
36986 /* Resolver is not external, body is generated. */
36987 DECL_EXTERNAL (decl) = 0;
36988 DECL_EXTERNAL (dispatch_decl) = 0;
36990 DECL_CONTEXT (decl) = NULL_TREE;
36991 DECL_INITIAL (decl) = make_node (BLOCK);
36992 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36994 if (DECL_COMDAT_GROUP (default_decl)
36995 || TREE_PUBLIC (default_decl))
36997 /* In this case, each translation unit with a call to this
36998 versioned function will put out a resolver. Ensure it
36999 is comdat to keep just one copy. */
37000 DECL_COMDAT (decl) = 1;
37001 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
37003 /* Build result decl and add to function_decl. */
37004 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37005 DECL_ARTIFICIAL (t) = 1;
37006 DECL_IGNORED_P (t) = 1;
37007 DECL_RESULT (decl) = t;
37009 gimplify_function_tree (decl);
37010 push_cfun (DECL_STRUCT_FUNCTION (decl));
37011 *empty_bb = init_lowered_empty_function (decl, false, 0);
37013 cgraph_node::add_new_function (decl, true);
37014 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37018 gcc_assert (dispatch_decl != NULL);
37019 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37020 DECL_ATTRIBUTES (dispatch_decl)
37021 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37023 /* Create the alias for dispatch to resolver here. */
37024 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37025 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37026 XDELETEVEC (resolver_name);
37030 /* Generate the dispatching code body to dispatch multi-versioned function
37031 DECL. The target hook is called to process the "target" attributes and
37032 provide the code to dispatch the right function at run-time. NODE points
37033 to the dispatcher decl whose body will be created. */
37036 ix86_generate_version_dispatcher_body (void *node_p)
37038 tree resolver_decl;
37039 basic_block empty_bb;
37040 tree default_ver_decl;
37041 struct cgraph_node *versn;
37042 struct cgraph_node *node;
37044 struct cgraph_function_version_info *node_version_info = NULL;
37045 struct cgraph_function_version_info *versn_info = NULL;
37047 node = (cgraph_node *)node_p;
37049 node_version_info = node->function_version ();
37050 gcc_assert (node->dispatcher_function
37051 && node_version_info != NULL);
37053 if (node_version_info->dispatcher_resolver)
37054 return node_version_info->dispatcher_resolver;
37056 /* The first version in the chain corresponds to the default version. */
37057 default_ver_decl = node_version_info->next->this_node->decl;
37059 /* node is going to be an alias, so remove the finalized bit. */
37060 node->definition = false;
37062 resolver_decl = make_resolver_func (default_ver_decl,
37063 node->decl, &empty_bb);
37065 node_version_info->dispatcher_resolver = resolver_decl;
37067 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37069 auto_vec<tree, 2> fn_ver_vec;
37071 for (versn_info = node_version_info->next; versn_info;
37072 versn_info = versn_info->next)
37074 versn = versn_info->this_node;
37075 /* Check for virtual functions here again, as by this time it should
37076 have been determined if this function needs a vtable index or
37077 not. This happens for methods in derived classes that override
37078 virtual methods in base classes but are not explicitly marked as
37080 if (DECL_VINDEX (versn->decl))
37081 sorry ("Virtual function multiversioning not supported");
37083 fn_ver_vec.safe_push (versn->decl);
37086 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37087 cgraph_edge::rebuild_edges ();
37089 return resolver_decl;
37091 /* This builds the processor_model struct type defined in
37092 libgcc/config/i386/cpuinfo.c */
37095 build_processor_model_struct (void)
37097 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
37099 tree field = NULL_TREE, field_chain = NULL_TREE;
37101 tree type = make_node (RECORD_TYPE);
37103 /* The first 3 fields are unsigned int. */
37104 for (i = 0; i < 3; ++i)
37106 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37107 get_identifier (field_name[i]), unsigned_type_node);
37108 if (field_chain != NULL_TREE)
37109 DECL_CHAIN (field) = field_chain;
37110 field_chain = field;
37113 /* The last field is an array of unsigned integers of size one. */
37114 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37115 get_identifier (field_name[3]),
37116 build_array_type (unsigned_type_node,
37117 build_index_type (size_one_node)));
37118 if (field_chain != NULL_TREE)
37119 DECL_CHAIN (field) = field_chain;
37120 field_chain = field;
37122 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
37126 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
37129 make_var_decl (tree type, const char *name)
37133 new_decl = build_decl (UNKNOWN_LOCATION,
37135 get_identifier(name),
37138 DECL_EXTERNAL (new_decl) = 1;
37139 TREE_STATIC (new_decl) = 1;
37140 TREE_PUBLIC (new_decl) = 1;
37141 DECL_INITIAL (new_decl) = 0;
37142 DECL_ARTIFICIAL (new_decl) = 0;
37143 DECL_PRESERVE_P (new_decl) = 1;
37145 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
37146 assemble_variable (new_decl, 0, 0, 0);
37151 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
37152 into an integer defined in libgcc/config/i386/cpuinfo.c */
37155 fold_builtin_cpu (tree fndecl, tree *args)
37158 enum ix86_builtins fn_code = (enum ix86_builtins)
37159 DECL_FUNCTION_CODE (fndecl);
37160 tree param_string_cst = NULL;
37162 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
37163 enum processor_features
37196 /* These are the values for vendor types and cpu types and subtypes
37197 in cpuinfo.c. Cpu types and subtypes should be subtracted by
37198 the corresponding start value. */
37199 enum processor_model
37209 M_INTEL_SILVERMONT,
37213 M_CPU_SUBTYPE_START,
37214 M_INTEL_COREI7_NEHALEM,
37215 M_INTEL_COREI7_WESTMERE,
37216 M_INTEL_COREI7_SANDYBRIDGE,
37217 M_AMDFAM10H_BARCELONA,
37218 M_AMDFAM10H_SHANGHAI,
37219 M_AMDFAM10H_ISTANBUL,
37220 M_AMDFAM15H_BDVER1,
37221 M_AMDFAM15H_BDVER2,
37222 M_AMDFAM15H_BDVER3,
37223 M_AMDFAM15H_BDVER4,
37224 M_AMDFAM17H_ZNVER1,
37225 M_INTEL_COREI7_IVYBRIDGE,
37226 M_INTEL_COREI7_HASWELL,
37227 M_INTEL_COREI7_BROADWELL,
37228 M_INTEL_COREI7_SKYLAKE,
37229 M_INTEL_COREI7_SKYLAKE_AVX512
37232 static struct _arch_names_table
37234 const char *const name;
37235 const enum processor_model model;
37237 const arch_names_table[] =
37240 {"intel", M_INTEL},
37241 {"atom", M_INTEL_BONNELL},
37242 {"slm", M_INTEL_SILVERMONT},
37243 {"core2", M_INTEL_CORE2},
37244 {"corei7", M_INTEL_COREI7},
37245 {"nehalem", M_INTEL_COREI7_NEHALEM},
37246 {"westmere", M_INTEL_COREI7_WESTMERE},
37247 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
37248 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
37249 {"haswell", M_INTEL_COREI7_HASWELL},
37250 {"broadwell", M_INTEL_COREI7_BROADWELL},
37251 {"skylake", M_INTEL_COREI7_SKYLAKE},
37252 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
37253 {"bonnell", M_INTEL_BONNELL},
37254 {"silvermont", M_INTEL_SILVERMONT},
37255 {"knl", M_INTEL_KNL},
37256 {"amdfam10h", M_AMDFAM10H},
37257 {"barcelona", M_AMDFAM10H_BARCELONA},
37258 {"shanghai", M_AMDFAM10H_SHANGHAI},
37259 {"istanbul", M_AMDFAM10H_ISTANBUL},
37260 {"btver1", M_AMD_BTVER1},
37261 {"amdfam15h", M_AMDFAM15H},
37262 {"bdver1", M_AMDFAM15H_BDVER1},
37263 {"bdver2", M_AMDFAM15H_BDVER2},
37264 {"bdver3", M_AMDFAM15H_BDVER3},
37265 {"bdver4", M_AMDFAM15H_BDVER4},
37266 {"btver2", M_AMD_BTVER2},
37267 {"znver1", M_AMDFAM17H_ZNVER1},
37270 static struct _isa_names_table
37272 const char *const name;
37273 const enum processor_features feature;
37275 const isa_names_table[] =
37279 {"popcnt", F_POPCNT},
37283 {"ssse3", F_SSSE3},
37284 {"sse4a", F_SSE4_A},
37285 {"sse4.1", F_SSE4_1},
37286 {"sse4.2", F_SSE4_2},
37292 {"avx512f", F_AVX512F},
37296 {"pclmul", F_PCLMUL},
37297 {"avx512vl",F_AVX512VL},
37298 {"avx512bw",F_AVX512BW},
37299 {"avx512dq",F_AVX512DQ},
37300 {"avx512cd",F_AVX512CD},
37301 {"avx512er",F_AVX512ER},
37302 {"avx512pf",F_AVX512PF},
37303 {"avx512vbmi",F_AVX512VBMI},
37304 {"avx512ifma",F_AVX512IFMA},
37307 tree __processor_model_type = build_processor_model_struct ();
37308 tree __cpu_model_var = make_var_decl (__processor_model_type,
37312 varpool_node::add (__cpu_model_var);
37314 gcc_assert ((args != NULL) && (*args != NULL));
37316 param_string_cst = *args;
37317 while (param_string_cst
37318 && TREE_CODE (param_string_cst) != STRING_CST)
37320 /* *args must be a expr that can contain other EXPRS leading to a
37322 if (!EXPR_P (param_string_cst))
37324 error ("Parameter to builtin must be a string constant or literal");
37325 return integer_zero_node;
37327 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
37330 gcc_assert (param_string_cst);
37332 if (fn_code == IX86_BUILTIN_CPU_IS)
37338 unsigned int field_val = 0;
37339 unsigned int NUM_ARCH_NAMES
37340 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
37342 for (i = 0; i < NUM_ARCH_NAMES; i++)
37343 if (strcmp (arch_names_table[i].name,
37344 TREE_STRING_POINTER (param_string_cst)) == 0)
37347 if (i == NUM_ARCH_NAMES)
37349 error ("Parameter to builtin not valid: %s",
37350 TREE_STRING_POINTER (param_string_cst));
37351 return integer_zero_node;
37354 field = TYPE_FIELDS (__processor_model_type);
37355 field_val = arch_names_table[i].model;
37357 /* CPU types are stored in the next field. */
37358 if (field_val > M_CPU_TYPE_START
37359 && field_val < M_CPU_SUBTYPE_START)
37361 field = DECL_CHAIN (field);
37362 field_val -= M_CPU_TYPE_START;
37365 /* CPU subtypes are stored in the next field. */
37366 if (field_val > M_CPU_SUBTYPE_START)
37368 field = DECL_CHAIN ( DECL_CHAIN (field));
37369 field_val -= M_CPU_SUBTYPE_START;
37372 /* Get the appropriate field in __cpu_model. */
37373 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37376 /* Check the value. */
37377 final = build2 (EQ_EXPR, unsigned_type_node, ref,
37378 build_int_cstu (unsigned_type_node, field_val));
37379 return build1 (CONVERT_EXPR, integer_type_node, final);
37381 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37388 unsigned int field_val = 0;
37389 unsigned int NUM_ISA_NAMES
37390 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
37392 for (i = 0; i < NUM_ISA_NAMES; i++)
37393 if (strcmp (isa_names_table[i].name,
37394 TREE_STRING_POINTER (param_string_cst)) == 0)
37397 if (i == NUM_ISA_NAMES)
37399 error ("Parameter to builtin not valid: %s",
37400 TREE_STRING_POINTER (param_string_cst));
37401 return integer_zero_node;
37404 field = TYPE_FIELDS (__processor_model_type);
37405 /* Get the last field, which is __cpu_features. */
37406 while (DECL_CHAIN (field))
37407 field = DECL_CHAIN (field);
37409 /* Get the appropriate field: __cpu_model.__cpu_features */
37410 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37413 /* Access the 0th element of __cpu_features array. */
37414 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
37415 integer_zero_node, NULL_TREE, NULL_TREE);
37417 field_val = (1 << isa_names_table[i].feature);
37418 /* Return __cpu_model.__cpu_features[0] & field_val */
37419 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
37420 build_int_cstu (unsigned_type_node, field_val));
37421 return build1 (CONVERT_EXPR, integer_type_node, final);
37423 gcc_unreachable ();
37427 ix86_fold_builtin (tree fndecl, int n_args,
37428 tree *args, bool ignore ATTRIBUTE_UNUSED)
37430 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
37432 enum ix86_builtins fn_code = (enum ix86_builtins)
37433 DECL_FUNCTION_CODE (fndecl);
37434 if (fn_code == IX86_BUILTIN_CPU_IS
37435 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37437 gcc_assert (n_args == 1);
37438 return fold_builtin_cpu (fndecl, args);
37442 #ifdef SUBTARGET_FOLD_BUILTIN
37443 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
37449 /* Make builtins to detect cpu type and features supported. NAME is
37450 the builtin name, CODE is the builtin code, and FTYPE is the function
37451 type of the builtin. */
37454 make_cpu_type_builtin (const char* name, int code,
37455 enum ix86_builtin_func_type ftype, bool is_const)
37460 type = ix86_get_builtin_func_type (ftype);
37461 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37463 gcc_assert (decl != NULL_TREE);
37464 ix86_builtins[(int) code] = decl;
37465 TREE_READONLY (decl) = is_const;
37468 /* Make builtins to get CPU type and features supported. The created
37471 __builtin_cpu_init (), to detect cpu type and features,
37472 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37473 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37477 ix86_init_platform_type_builtins (void)
37479 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37480 INT_FTYPE_VOID, false);
37481 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37482 INT_FTYPE_PCCHAR, true);
37483 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37484 INT_FTYPE_PCCHAR, true);
37487 /* Internal method for ix86_init_builtins. */
37490 ix86_init_builtins_va_builtins_abi (void)
37492 tree ms_va_ref, sysv_va_ref;
37493 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37494 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37495 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37496 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37500 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37501 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37502 ms_va_ref = build_reference_type (ms_va_list_type_node);
37504 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37507 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37508 fnvoid_va_start_ms =
37509 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37510 fnvoid_va_end_sysv =
37511 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37512 fnvoid_va_start_sysv =
37513 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37515 fnvoid_va_copy_ms =
37516 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37518 fnvoid_va_copy_sysv =
37519 build_function_type_list (void_type_node, sysv_va_ref,
37520 sysv_va_ref, NULL_TREE);
37522 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37523 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37524 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37525 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37526 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37527 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37528 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37529 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37530 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37531 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37532 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37533 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37537 ix86_init_builtin_types (void)
37539 tree float128_type_node, float80_type_node;
37541 /* The __float80 type. */
37542 float80_type_node = long_double_type_node;
37543 if (TYPE_MODE (float80_type_node) != XFmode)
37545 /* The __float80 type. */
37546 float80_type_node = make_node (REAL_TYPE);
37548 TYPE_PRECISION (float80_type_node) = 80;
37549 layout_type (float80_type_node);
37551 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37553 /* The __float128 type. */
37554 float128_type_node = make_node (REAL_TYPE);
37555 TYPE_PRECISION (float128_type_node) = 128;
37556 layout_type (float128_type_node);
37557 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37559 /* This macro is built by i386-builtin-types.awk. */
37560 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37564 ix86_init_builtins (void)
37568 ix86_init_builtin_types ();
37570 /* Builtins to get CPU type and features. */
37571 ix86_init_platform_type_builtins ();
37573 /* TFmode support builtins. */
37574 def_builtin_const (0, "__builtin_infq",
37575 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37576 def_builtin_const (0, "__builtin_huge_valq",
37577 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37579 /* We will expand them to normal call if SSE isn't available since
37580 they are used by libgcc. */
37581 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37582 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37583 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37584 TREE_READONLY (t) = 1;
37585 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37587 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37588 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37589 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37590 TREE_READONLY (t) = 1;
37591 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37593 ix86_init_tm_builtins ();
37594 ix86_init_mmx_sse_builtins ();
37595 ix86_init_mpx_builtins ();
37598 ix86_init_builtins_va_builtins_abi ();
37600 #ifdef SUBTARGET_INIT_BUILTINS
37601 SUBTARGET_INIT_BUILTINS;
37605 /* Return the ix86 builtin for CODE. */
37608 ix86_builtin_decl (unsigned code, bool)
37610 if (code >= IX86_BUILTIN_MAX)
37611 return error_mark_node;
37613 return ix86_builtins[code];
37616 /* Errors in the source file can cause expand_expr to return const0_rtx
37617 where we expect a vector. To avoid crashing, use one of the vector
37618 clear instructions. */
37620 safe_vector_operand (rtx x, machine_mode mode)
37622 if (x == const0_rtx)
37623 x = CONST0_RTX (mode);
37627 /* Fixup modeless constants to fit required mode. */
37629 fixup_modeless_constant (rtx x, machine_mode mode)
37631 if (GET_MODE (x) == VOIDmode)
37632 x = convert_to_mode (mode, x, 1);
37636 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37639 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37642 tree arg0 = CALL_EXPR_ARG (exp, 0);
37643 tree arg1 = CALL_EXPR_ARG (exp, 1);
37644 rtx op0 = expand_normal (arg0);
37645 rtx op1 = expand_normal (arg1);
37646 machine_mode tmode = insn_data[icode].operand[0].mode;
37647 machine_mode mode0 = insn_data[icode].operand[1].mode;
37648 machine_mode mode1 = insn_data[icode].operand[2].mode;
37650 if (VECTOR_MODE_P (mode0))
37651 op0 = safe_vector_operand (op0, mode0);
37652 if (VECTOR_MODE_P (mode1))
37653 op1 = safe_vector_operand (op1, mode1);
37655 if (optimize || !target
37656 || GET_MODE (target) != tmode
37657 || !insn_data[icode].operand[0].predicate (target, tmode))
37658 target = gen_reg_rtx (tmode);
37660 if (GET_MODE (op1) == SImode && mode1 == TImode)
37662 rtx x = gen_reg_rtx (V4SImode);
37663 emit_insn (gen_sse2_loadd (x, op1));
37664 op1 = gen_lowpart (TImode, x);
37667 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37668 op0 = copy_to_mode_reg (mode0, op0);
37669 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37670 op1 = copy_to_mode_reg (mode1, op1);
37672 pat = GEN_FCN (icode) (target, op0, op1);
37681 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37684 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37685 enum ix86_builtin_func_type m_type,
37686 enum rtx_code sub_code)
37691 bool comparison_p = false;
37693 bool last_arg_constant = false;
37694 int num_memory = 0;
37700 machine_mode tmode = insn_data[icode].operand[0].mode;
37704 case MULTI_ARG_4_DF2_DI_I:
37705 case MULTI_ARG_4_DF2_DI_I1:
37706 case MULTI_ARG_4_SF2_SI_I:
37707 case MULTI_ARG_4_SF2_SI_I1:
37709 last_arg_constant = true;
37712 case MULTI_ARG_3_SF:
37713 case MULTI_ARG_3_DF:
37714 case MULTI_ARG_3_SF2:
37715 case MULTI_ARG_3_DF2:
37716 case MULTI_ARG_3_DI:
37717 case MULTI_ARG_3_SI:
37718 case MULTI_ARG_3_SI_DI:
37719 case MULTI_ARG_3_HI:
37720 case MULTI_ARG_3_HI_SI:
37721 case MULTI_ARG_3_QI:
37722 case MULTI_ARG_3_DI2:
37723 case MULTI_ARG_3_SI2:
37724 case MULTI_ARG_3_HI2:
37725 case MULTI_ARG_3_QI2:
37729 case MULTI_ARG_2_SF:
37730 case MULTI_ARG_2_DF:
37731 case MULTI_ARG_2_DI:
37732 case MULTI_ARG_2_SI:
37733 case MULTI_ARG_2_HI:
37734 case MULTI_ARG_2_QI:
37738 case MULTI_ARG_2_DI_IMM:
37739 case MULTI_ARG_2_SI_IMM:
37740 case MULTI_ARG_2_HI_IMM:
37741 case MULTI_ARG_2_QI_IMM:
37743 last_arg_constant = true;
37746 case MULTI_ARG_1_SF:
37747 case MULTI_ARG_1_DF:
37748 case MULTI_ARG_1_SF2:
37749 case MULTI_ARG_1_DF2:
37750 case MULTI_ARG_1_DI:
37751 case MULTI_ARG_1_SI:
37752 case MULTI_ARG_1_HI:
37753 case MULTI_ARG_1_QI:
37754 case MULTI_ARG_1_SI_DI:
37755 case MULTI_ARG_1_HI_DI:
37756 case MULTI_ARG_1_HI_SI:
37757 case MULTI_ARG_1_QI_DI:
37758 case MULTI_ARG_1_QI_SI:
37759 case MULTI_ARG_1_QI_HI:
37763 case MULTI_ARG_2_DI_CMP:
37764 case MULTI_ARG_2_SI_CMP:
37765 case MULTI_ARG_2_HI_CMP:
37766 case MULTI_ARG_2_QI_CMP:
37768 comparison_p = true;
37771 case MULTI_ARG_2_SF_TF:
37772 case MULTI_ARG_2_DF_TF:
37773 case MULTI_ARG_2_DI_TF:
37774 case MULTI_ARG_2_SI_TF:
37775 case MULTI_ARG_2_HI_TF:
37776 case MULTI_ARG_2_QI_TF:
37782 gcc_unreachable ();
37785 if (optimize || !target
37786 || GET_MODE (target) != tmode
37787 || !insn_data[icode].operand[0].predicate (target, tmode))
37788 target = gen_reg_rtx (tmode);
37790 gcc_assert (nargs <= 4);
37792 for (i = 0; i < nargs; i++)
37794 tree arg = CALL_EXPR_ARG (exp, i);
37795 rtx op = expand_normal (arg);
37796 int adjust = (comparison_p) ? 1 : 0;
37797 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37799 if (last_arg_constant && i == nargs - 1)
37801 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37803 enum insn_code new_icode = icode;
37806 case CODE_FOR_xop_vpermil2v2df3:
37807 case CODE_FOR_xop_vpermil2v4sf3:
37808 case CODE_FOR_xop_vpermil2v4df3:
37809 case CODE_FOR_xop_vpermil2v8sf3:
37810 error ("the last argument must be a 2-bit immediate");
37811 return gen_reg_rtx (tmode);
37812 case CODE_FOR_xop_rotlv2di3:
37813 new_icode = CODE_FOR_rotlv2di3;
37815 case CODE_FOR_xop_rotlv4si3:
37816 new_icode = CODE_FOR_rotlv4si3;
37818 case CODE_FOR_xop_rotlv8hi3:
37819 new_icode = CODE_FOR_rotlv8hi3;
37821 case CODE_FOR_xop_rotlv16qi3:
37822 new_icode = CODE_FOR_rotlv16qi3;
37824 if (CONST_INT_P (op))
37826 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37827 op = GEN_INT (INTVAL (op) & mask);
37828 gcc_checking_assert
37829 (insn_data[icode].operand[i + 1].predicate (op, mode));
37833 gcc_checking_assert
37835 && insn_data[new_icode].operand[0].mode == tmode
37836 && insn_data[new_icode].operand[1].mode == tmode
37837 && insn_data[new_icode].operand[2].mode == mode
37838 && insn_data[new_icode].operand[0].predicate
37839 == insn_data[icode].operand[0].predicate
37840 && insn_data[new_icode].operand[1].predicate
37841 == insn_data[icode].operand[1].predicate);
37847 gcc_unreachable ();
37854 if (VECTOR_MODE_P (mode))
37855 op = safe_vector_operand (op, mode);
37857 /* If we aren't optimizing, only allow one memory operand to be
37859 if (memory_operand (op, mode))
37862 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37865 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37867 op = force_reg (mode, op);
37871 args[i].mode = mode;
37877 pat = GEN_FCN (icode) (target, args[0].op);
37882 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37883 GEN_INT ((int)sub_code));
37884 else if (! comparison_p)
37885 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37888 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37892 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37897 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37901 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37905 gcc_unreachable ();
37915 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37916 insns with vec_merge. */
37919 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37923 tree arg0 = CALL_EXPR_ARG (exp, 0);
37924 rtx op1, op0 = expand_normal (arg0);
37925 machine_mode tmode = insn_data[icode].operand[0].mode;
37926 machine_mode mode0 = insn_data[icode].operand[1].mode;
37928 if (optimize || !target
37929 || GET_MODE (target) != tmode
37930 || !insn_data[icode].operand[0].predicate (target, tmode))
37931 target = gen_reg_rtx (tmode);
37933 if (VECTOR_MODE_P (mode0))
37934 op0 = safe_vector_operand (op0, mode0);
37936 if ((optimize && !register_operand (op0, mode0))
37937 || !insn_data[icode].operand[1].predicate (op0, mode0))
37938 op0 = copy_to_mode_reg (mode0, op0);
37941 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37942 op1 = copy_to_mode_reg (mode0, op1);
37944 pat = GEN_FCN (icode) (target, op0, op1);
37951 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
37954 ix86_expand_sse_compare (const struct builtin_description *d,
37955 tree exp, rtx target, bool swap)
37958 tree arg0 = CALL_EXPR_ARG (exp, 0);
37959 tree arg1 = CALL_EXPR_ARG (exp, 1);
37960 rtx op0 = expand_normal (arg0);
37961 rtx op1 = expand_normal (arg1);
37963 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37964 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37965 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37966 enum rtx_code comparison = d->comparison;
37968 if (VECTOR_MODE_P (mode0))
37969 op0 = safe_vector_operand (op0, mode0);
37970 if (VECTOR_MODE_P (mode1))
37971 op1 = safe_vector_operand (op1, mode1);
37973 /* Swap operands if we have a comparison that isn't available in
37976 std::swap (op0, op1);
37978 if (optimize || !target
37979 || GET_MODE (target) != tmode
37980 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37981 target = gen_reg_rtx (tmode);
37983 if ((optimize && !register_operand (op0, mode0))
37984 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
37985 op0 = copy_to_mode_reg (mode0, op0);
37986 if ((optimize && !register_operand (op1, mode1))
37987 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
37988 op1 = copy_to_mode_reg (mode1, op1);
37990 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
37991 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
37998 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
38001 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
38005 tree arg0 = CALL_EXPR_ARG (exp, 0);
38006 tree arg1 = CALL_EXPR_ARG (exp, 1);
38007 rtx op0 = expand_normal (arg0);
38008 rtx op1 = expand_normal (arg1);
38009 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38010 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38011 enum rtx_code comparison = d->comparison;
38013 if (VECTOR_MODE_P (mode0))
38014 op0 = safe_vector_operand (op0, mode0);
38015 if (VECTOR_MODE_P (mode1))
38016 op1 = safe_vector_operand (op1, mode1);
38018 /* Swap operands if we have a comparison that isn't available in
38020 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38021 std::swap (op0, op1);
38023 target = gen_reg_rtx (SImode);
38024 emit_move_insn (target, const0_rtx);
38025 target = gen_rtx_SUBREG (QImode, target, 0);
38027 if ((optimize && !register_operand (op0, mode0))
38028 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38029 op0 = copy_to_mode_reg (mode0, op0);
38030 if ((optimize && !register_operand (op1, mode1))
38031 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38032 op1 = copy_to_mode_reg (mode1, op1);
38034 pat = GEN_FCN (d->icode) (op0, op1);
38038 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38039 gen_rtx_fmt_ee (comparison, QImode,
38043 return SUBREG_REG (target);
38046 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38049 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38053 tree arg0 = CALL_EXPR_ARG (exp, 0);
38054 rtx op1, op0 = expand_normal (arg0);
38055 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38056 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38058 if (optimize || target == 0
38059 || GET_MODE (target) != tmode
38060 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38061 target = gen_reg_rtx (tmode);
38063 if (VECTOR_MODE_P (mode0))
38064 op0 = safe_vector_operand (op0, mode0);
38066 if ((optimize && !register_operand (op0, mode0))
38067 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38068 op0 = copy_to_mode_reg (mode0, op0);
38070 op1 = GEN_INT (d->comparison);
38072 pat = GEN_FCN (d->icode) (target, op0, op1);
38080 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
38081 tree exp, rtx target)
38084 tree arg0 = CALL_EXPR_ARG (exp, 0);
38085 tree arg1 = CALL_EXPR_ARG (exp, 1);
38086 rtx op0 = expand_normal (arg0);
38087 rtx op1 = expand_normal (arg1);
38089 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38090 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38091 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38093 if (optimize || target == 0
38094 || GET_MODE (target) != tmode
38095 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38096 target = gen_reg_rtx (tmode);
38098 op0 = safe_vector_operand (op0, mode0);
38099 op1 = safe_vector_operand (op1, mode1);
38101 if ((optimize && !register_operand (op0, mode0))
38102 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38103 op0 = copy_to_mode_reg (mode0, op0);
38104 if ((optimize && !register_operand (op1, mode1))
38105 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38106 op1 = copy_to_mode_reg (mode1, op1);
38108 op2 = GEN_INT (d->comparison);
38110 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38117 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
38120 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
38124 tree arg0 = CALL_EXPR_ARG (exp, 0);
38125 tree arg1 = CALL_EXPR_ARG (exp, 1);
38126 rtx op0 = expand_normal (arg0);
38127 rtx op1 = expand_normal (arg1);
38128 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38129 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38130 enum rtx_code comparison = d->comparison;
38132 if (VECTOR_MODE_P (mode0))
38133 op0 = safe_vector_operand (op0, mode0);
38134 if (VECTOR_MODE_P (mode1))
38135 op1 = safe_vector_operand (op1, mode1);
38137 target = gen_reg_rtx (SImode);
38138 emit_move_insn (target, const0_rtx);
38139 target = gen_rtx_SUBREG (QImode, target, 0);
38141 if ((optimize && !register_operand (op0, mode0))
38142 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38143 op0 = copy_to_mode_reg (mode0, op0);
38144 if ((optimize && !register_operand (op1, mode1))
38145 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38146 op1 = copy_to_mode_reg (mode1, op1);
38148 pat = GEN_FCN (d->icode) (op0, op1);
38152 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38153 gen_rtx_fmt_ee (comparison, QImode,
38157 return SUBREG_REG (target);
38160 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
38163 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
38164 tree exp, rtx target)
38167 tree arg0 = CALL_EXPR_ARG (exp, 0);
38168 tree arg1 = CALL_EXPR_ARG (exp, 1);
38169 tree arg2 = CALL_EXPR_ARG (exp, 2);
38170 tree arg3 = CALL_EXPR_ARG (exp, 3);
38171 tree arg4 = CALL_EXPR_ARG (exp, 4);
38172 rtx scratch0, scratch1;
38173 rtx op0 = expand_normal (arg0);
38174 rtx op1 = expand_normal (arg1);
38175 rtx op2 = expand_normal (arg2);
38176 rtx op3 = expand_normal (arg3);
38177 rtx op4 = expand_normal (arg4);
38178 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
38180 tmode0 = insn_data[d->icode].operand[0].mode;
38181 tmode1 = insn_data[d->icode].operand[1].mode;
38182 modev2 = insn_data[d->icode].operand[2].mode;
38183 modei3 = insn_data[d->icode].operand[3].mode;
38184 modev4 = insn_data[d->icode].operand[4].mode;
38185 modei5 = insn_data[d->icode].operand[5].mode;
38186 modeimm = insn_data[d->icode].operand[6].mode;
38188 if (VECTOR_MODE_P (modev2))
38189 op0 = safe_vector_operand (op0, modev2);
38190 if (VECTOR_MODE_P (modev4))
38191 op2 = safe_vector_operand (op2, modev4);
38193 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38194 op0 = copy_to_mode_reg (modev2, op0);
38195 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
38196 op1 = copy_to_mode_reg (modei3, op1);
38197 if ((optimize && !register_operand (op2, modev4))
38198 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
38199 op2 = copy_to_mode_reg (modev4, op2);
38200 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
38201 op3 = copy_to_mode_reg (modei5, op3);
38203 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
38205 error ("the fifth argument must be an 8-bit immediate");
38209 if (d->code == IX86_BUILTIN_PCMPESTRI128)
38211 if (optimize || !target
38212 || GET_MODE (target) != tmode0
38213 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38214 target = gen_reg_rtx (tmode0);
38216 scratch1 = gen_reg_rtx (tmode1);
38218 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
38220 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
38222 if (optimize || !target
38223 || GET_MODE (target) != tmode1
38224 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38225 target = gen_reg_rtx (tmode1);
38227 scratch0 = gen_reg_rtx (tmode0);
38229 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
38233 gcc_assert (d->flag);
38235 scratch0 = gen_reg_rtx (tmode0);
38236 scratch1 = gen_reg_rtx (tmode1);
38238 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
38248 target = gen_reg_rtx (SImode);
38249 emit_move_insn (target, const0_rtx);
38250 target = gen_rtx_SUBREG (QImode, target, 0);
38253 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38254 gen_rtx_fmt_ee (EQ, QImode,
38255 gen_rtx_REG ((machine_mode) d->flag,
38258 return SUBREG_REG (target);
38265 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
38268 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
38269 tree exp, rtx target)
38272 tree arg0 = CALL_EXPR_ARG (exp, 0);
38273 tree arg1 = CALL_EXPR_ARG (exp, 1);
38274 tree arg2 = CALL_EXPR_ARG (exp, 2);
38275 rtx scratch0, scratch1;
38276 rtx op0 = expand_normal (arg0);
38277 rtx op1 = expand_normal (arg1);
38278 rtx op2 = expand_normal (arg2);
38279 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
38281 tmode0 = insn_data[d->icode].operand[0].mode;
38282 tmode1 = insn_data[d->icode].operand[1].mode;
38283 modev2 = insn_data[d->icode].operand[2].mode;
38284 modev3 = insn_data[d->icode].operand[3].mode;
38285 modeimm = insn_data[d->icode].operand[4].mode;
38287 if (VECTOR_MODE_P (modev2))
38288 op0 = safe_vector_operand (op0, modev2);
38289 if (VECTOR_MODE_P (modev3))
38290 op1 = safe_vector_operand (op1, modev3);
38292 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38293 op0 = copy_to_mode_reg (modev2, op0);
38294 if ((optimize && !register_operand (op1, modev3))
38295 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
38296 op1 = copy_to_mode_reg (modev3, op1);
38298 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
38300 error ("the third argument must be an 8-bit immediate");
38304 if (d->code == IX86_BUILTIN_PCMPISTRI128)
38306 if (optimize || !target
38307 || GET_MODE (target) != tmode0
38308 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38309 target = gen_reg_rtx (tmode0);
38311 scratch1 = gen_reg_rtx (tmode1);
38313 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
38315 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
38317 if (optimize || !target
38318 || GET_MODE (target) != tmode1
38319 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38320 target = gen_reg_rtx (tmode1);
38322 scratch0 = gen_reg_rtx (tmode0);
38324 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
38328 gcc_assert (d->flag);
38330 scratch0 = gen_reg_rtx (tmode0);
38331 scratch1 = gen_reg_rtx (tmode1);
38333 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
38343 target = gen_reg_rtx (SImode);
38344 emit_move_insn (target, const0_rtx);
38345 target = gen_rtx_SUBREG (QImode, target, 0);
38348 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38349 gen_rtx_fmt_ee (EQ, QImode,
38350 gen_rtx_REG ((machine_mode) d->flag,
38353 return SUBREG_REG (target);
38359 /* Subroutine of ix86_expand_builtin to take care of insns with
38360 variable number of operands. */
38363 ix86_expand_args_builtin (const struct builtin_description *d,
38364 tree exp, rtx target)
38366 rtx pat, real_target;
38367 unsigned int i, nargs;
38368 unsigned int nargs_constant = 0;
38369 unsigned int mask_pos = 0;
38370 int num_memory = 0;
38376 bool last_arg_count = false;
38377 enum insn_code icode = d->icode;
38378 const struct insn_data_d *insn_p = &insn_data[icode];
38379 machine_mode tmode = insn_p->operand[0].mode;
38380 machine_mode rmode = VOIDmode;
38382 enum rtx_code comparison = d->comparison;
38384 switch ((enum ix86_builtin_func_type) d->flag)
38386 case V2DF_FTYPE_V2DF_ROUND:
38387 case V4DF_FTYPE_V4DF_ROUND:
38388 case V4SF_FTYPE_V4SF_ROUND:
38389 case V8SF_FTYPE_V8SF_ROUND:
38390 case V4SI_FTYPE_V4SF_ROUND:
38391 case V8SI_FTYPE_V8SF_ROUND:
38392 return ix86_expand_sse_round (d, exp, target);
38393 case V4SI_FTYPE_V2DF_V2DF_ROUND:
38394 case V8SI_FTYPE_V4DF_V4DF_ROUND:
38395 case V16SI_FTYPE_V8DF_V8DF_ROUND:
38396 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
38397 case INT_FTYPE_V8SF_V8SF_PTEST:
38398 case INT_FTYPE_V4DI_V4DI_PTEST:
38399 case INT_FTYPE_V4DF_V4DF_PTEST:
38400 case INT_FTYPE_V4SF_V4SF_PTEST:
38401 case INT_FTYPE_V2DI_V2DI_PTEST:
38402 case INT_FTYPE_V2DF_V2DF_PTEST:
38403 return ix86_expand_sse_ptest (d, exp, target);
38404 case FLOAT128_FTYPE_FLOAT128:
38405 case FLOAT_FTYPE_FLOAT:
38406 case INT_FTYPE_INT:
38407 case UINT64_FTYPE_INT:
38408 case UINT16_FTYPE_UINT16:
38409 case INT64_FTYPE_INT64:
38410 case INT64_FTYPE_V4SF:
38411 case INT64_FTYPE_V2DF:
38412 case INT_FTYPE_V16QI:
38413 case INT_FTYPE_V8QI:
38414 case INT_FTYPE_V8SF:
38415 case INT_FTYPE_V4DF:
38416 case INT_FTYPE_V4SF:
38417 case INT_FTYPE_V2DF:
38418 case INT_FTYPE_V32QI:
38419 case V16QI_FTYPE_V16QI:
38420 case V8SI_FTYPE_V8SF:
38421 case V8SI_FTYPE_V4SI:
38422 case V8HI_FTYPE_V8HI:
38423 case V8HI_FTYPE_V16QI:
38424 case V8QI_FTYPE_V8QI:
38425 case V8SF_FTYPE_V8SF:
38426 case V8SF_FTYPE_V8SI:
38427 case V8SF_FTYPE_V4SF:
38428 case V8SF_FTYPE_V8HI:
38429 case V4SI_FTYPE_V4SI:
38430 case V4SI_FTYPE_V16QI:
38431 case V4SI_FTYPE_V4SF:
38432 case V4SI_FTYPE_V8SI:
38433 case V4SI_FTYPE_V8HI:
38434 case V4SI_FTYPE_V4DF:
38435 case V4SI_FTYPE_V2DF:
38436 case V4HI_FTYPE_V4HI:
38437 case V4DF_FTYPE_V4DF:
38438 case V4DF_FTYPE_V4SI:
38439 case V4DF_FTYPE_V4SF:
38440 case V4DF_FTYPE_V2DF:
38441 case V4SF_FTYPE_V4SF:
38442 case V4SF_FTYPE_V4SI:
38443 case V4SF_FTYPE_V8SF:
38444 case V4SF_FTYPE_V4DF:
38445 case V4SF_FTYPE_V8HI:
38446 case V4SF_FTYPE_V2DF:
38447 case V2DI_FTYPE_V2DI:
38448 case V2DI_FTYPE_V16QI:
38449 case V2DI_FTYPE_V8HI:
38450 case V2DI_FTYPE_V4SI:
38451 case V2DF_FTYPE_V2DF:
38452 case V2DF_FTYPE_V4SI:
38453 case V2DF_FTYPE_V4DF:
38454 case V2DF_FTYPE_V4SF:
38455 case V2DF_FTYPE_V2SI:
38456 case V2SI_FTYPE_V2SI:
38457 case V2SI_FTYPE_V4SF:
38458 case V2SI_FTYPE_V2SF:
38459 case V2SI_FTYPE_V2DF:
38460 case V2SF_FTYPE_V2SF:
38461 case V2SF_FTYPE_V2SI:
38462 case V32QI_FTYPE_V32QI:
38463 case V32QI_FTYPE_V16QI:
38464 case V16HI_FTYPE_V16HI:
38465 case V16HI_FTYPE_V8HI:
38466 case V8SI_FTYPE_V8SI:
38467 case V16HI_FTYPE_V16QI:
38468 case V8SI_FTYPE_V16QI:
38469 case V4DI_FTYPE_V16QI:
38470 case V8SI_FTYPE_V8HI:
38471 case V4DI_FTYPE_V8HI:
38472 case V4DI_FTYPE_V4SI:
38473 case V4DI_FTYPE_V2DI:
38474 case UHI_FTYPE_UHI:
38475 case UHI_FTYPE_V16QI:
38476 case USI_FTYPE_V32QI:
38477 case UDI_FTYPE_V64QI:
38478 case V16QI_FTYPE_UHI:
38479 case V32QI_FTYPE_USI:
38480 case V64QI_FTYPE_UDI:
38481 case V8HI_FTYPE_UQI:
38482 case V16HI_FTYPE_UHI:
38483 case V32HI_FTYPE_USI:
38484 case V4SI_FTYPE_UQI:
38485 case V8SI_FTYPE_UQI:
38486 case V4SI_FTYPE_UHI:
38487 case V8SI_FTYPE_UHI:
38488 case UQI_FTYPE_V8HI:
38489 case UHI_FTYPE_V16HI:
38490 case USI_FTYPE_V32HI:
38491 case UQI_FTYPE_V4SI:
38492 case UQI_FTYPE_V8SI:
38493 case UHI_FTYPE_V16SI:
38494 case UQI_FTYPE_V2DI:
38495 case UQI_FTYPE_V4DI:
38496 case UQI_FTYPE_V8DI:
38497 case V16SI_FTYPE_UHI:
38498 case V2DI_FTYPE_UQI:
38499 case V4DI_FTYPE_UQI:
38500 case V16SI_FTYPE_INT:
38501 case V16SF_FTYPE_V8SF:
38502 case V16SI_FTYPE_V8SI:
38503 case V16SF_FTYPE_V4SF:
38504 case V16SI_FTYPE_V4SI:
38505 case V16SF_FTYPE_V16SF:
38506 case V8DI_FTYPE_UQI:
38507 case V8DF_FTYPE_V4DF:
38508 case V8DF_FTYPE_V2DF:
38509 case V8DF_FTYPE_V8DF:
38512 case V4SF_FTYPE_V4SF_VEC_MERGE:
38513 case V2DF_FTYPE_V2DF_VEC_MERGE:
38514 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38515 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38516 case V16QI_FTYPE_V16QI_V16QI:
38517 case V16QI_FTYPE_V8HI_V8HI:
38518 case V16SF_FTYPE_V16SF_V16SF:
38519 case V8QI_FTYPE_V8QI_V8QI:
38520 case V8QI_FTYPE_V4HI_V4HI:
38521 case V8HI_FTYPE_V8HI_V8HI:
38522 case V8HI_FTYPE_V16QI_V16QI:
38523 case V8HI_FTYPE_V4SI_V4SI:
38524 case V8SF_FTYPE_V8SF_V8SF:
38525 case V8SF_FTYPE_V8SF_V8SI:
38526 case V8DF_FTYPE_V8DF_V8DF:
38527 case V4SI_FTYPE_V4SI_V4SI:
38528 case V4SI_FTYPE_V8HI_V8HI:
38529 case V4SI_FTYPE_V2DF_V2DF:
38530 case V4HI_FTYPE_V4HI_V4HI:
38531 case V4HI_FTYPE_V8QI_V8QI:
38532 case V4HI_FTYPE_V2SI_V2SI:
38533 case V4DF_FTYPE_V4DF_V4DF:
38534 case V4DF_FTYPE_V4DF_V4DI:
38535 case V4SF_FTYPE_V4SF_V4SF:
38536 case V4SF_FTYPE_V4SF_V4SI:
38537 case V4SF_FTYPE_V4SF_V2SI:
38538 case V4SF_FTYPE_V4SF_V2DF:
38539 case V4SF_FTYPE_V4SF_UINT:
38540 case V4SF_FTYPE_V4SF_DI:
38541 case V4SF_FTYPE_V4SF_SI:
38542 case V2DI_FTYPE_V2DI_V2DI:
38543 case V2DI_FTYPE_V16QI_V16QI:
38544 case V2DI_FTYPE_V4SI_V4SI:
38545 case V2DI_FTYPE_V2DI_V16QI:
38546 case V2SI_FTYPE_V2SI_V2SI:
38547 case V2SI_FTYPE_V4HI_V4HI:
38548 case V2SI_FTYPE_V2SF_V2SF:
38549 case V2DF_FTYPE_V2DF_V2DF:
38550 case V2DF_FTYPE_V2DF_V4SF:
38551 case V2DF_FTYPE_V2DF_V2DI:
38552 case V2DF_FTYPE_V2DF_DI:
38553 case V2DF_FTYPE_V2DF_SI:
38554 case V2DF_FTYPE_V2DF_UINT:
38555 case V2SF_FTYPE_V2SF_V2SF:
38556 case V1DI_FTYPE_V1DI_V1DI:
38557 case V1DI_FTYPE_V8QI_V8QI:
38558 case V1DI_FTYPE_V2SI_V2SI:
38559 case V32QI_FTYPE_V16HI_V16HI:
38560 case V16HI_FTYPE_V8SI_V8SI:
38561 case V32QI_FTYPE_V32QI_V32QI:
38562 case V16HI_FTYPE_V32QI_V32QI:
38563 case V16HI_FTYPE_V16HI_V16HI:
38564 case V8SI_FTYPE_V4DF_V4DF:
38565 case V8SI_FTYPE_V8SI_V8SI:
38566 case V8SI_FTYPE_V16HI_V16HI:
38567 case V4DI_FTYPE_V4DI_V4DI:
38568 case V4DI_FTYPE_V8SI_V8SI:
38569 case V8DI_FTYPE_V64QI_V64QI:
38570 if (comparison == UNKNOWN)
38571 return ix86_expand_binop_builtin (icode, exp, target);
38574 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38575 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38576 gcc_assert (comparison != UNKNOWN);
38580 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38581 case V16HI_FTYPE_V16HI_SI_COUNT:
38582 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38583 case V8SI_FTYPE_V8SI_SI_COUNT:
38584 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38585 case V4DI_FTYPE_V4DI_INT_COUNT:
38586 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38587 case V8HI_FTYPE_V8HI_SI_COUNT:
38588 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38589 case V4SI_FTYPE_V4SI_SI_COUNT:
38590 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38591 case V4HI_FTYPE_V4HI_SI_COUNT:
38592 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38593 case V2DI_FTYPE_V2DI_SI_COUNT:
38594 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38595 case V2SI_FTYPE_V2SI_SI_COUNT:
38596 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38597 case V1DI_FTYPE_V1DI_SI_COUNT:
38599 last_arg_count = true;
38601 case UINT64_FTYPE_UINT64_UINT64:
38602 case UINT_FTYPE_UINT_UINT:
38603 case UINT_FTYPE_UINT_USHORT:
38604 case UINT_FTYPE_UINT_UCHAR:
38605 case UINT16_FTYPE_UINT16_INT:
38606 case UINT8_FTYPE_UINT8_INT:
38607 case UHI_FTYPE_UHI_UHI:
38608 case USI_FTYPE_USI_USI:
38609 case UDI_FTYPE_UDI_UDI:
38610 case V16SI_FTYPE_V8DF_V8DF:
38613 case V2DI_FTYPE_V2DI_INT_CONVERT:
38616 nargs_constant = 1;
38618 case V4DI_FTYPE_V4DI_INT_CONVERT:
38621 nargs_constant = 1;
38623 case V8DI_FTYPE_V8DI_INT_CONVERT:
38626 nargs_constant = 1;
38628 case V8HI_FTYPE_V8HI_INT:
38629 case V8HI_FTYPE_V8SF_INT:
38630 case V16HI_FTYPE_V16SF_INT:
38631 case V8HI_FTYPE_V4SF_INT:
38632 case V8SF_FTYPE_V8SF_INT:
38633 case V4SF_FTYPE_V16SF_INT:
38634 case V16SF_FTYPE_V16SF_INT:
38635 case V4SI_FTYPE_V4SI_INT:
38636 case V4SI_FTYPE_V8SI_INT:
38637 case V4HI_FTYPE_V4HI_INT:
38638 case V4DF_FTYPE_V4DF_INT:
38639 case V4DF_FTYPE_V8DF_INT:
38640 case V4SF_FTYPE_V4SF_INT:
38641 case V4SF_FTYPE_V8SF_INT:
38642 case V2DI_FTYPE_V2DI_INT:
38643 case V2DF_FTYPE_V2DF_INT:
38644 case V2DF_FTYPE_V4DF_INT:
38645 case V16HI_FTYPE_V16HI_INT:
38646 case V8SI_FTYPE_V8SI_INT:
38647 case V16SI_FTYPE_V16SI_INT:
38648 case V4SI_FTYPE_V16SI_INT:
38649 case V4DI_FTYPE_V4DI_INT:
38650 case V2DI_FTYPE_V4DI_INT:
38651 case V4DI_FTYPE_V8DI_INT:
38652 case QI_FTYPE_V4SF_INT:
38653 case QI_FTYPE_V2DF_INT:
38655 nargs_constant = 1;
38657 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38658 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38659 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38660 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38661 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38662 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38663 case UHI_FTYPE_V16SI_V16SI_UHI:
38664 case UQI_FTYPE_V8DI_V8DI_UQI:
38665 case V16HI_FTYPE_V16SI_V16HI_UHI:
38666 case V16QI_FTYPE_V16SI_V16QI_UHI:
38667 case V16QI_FTYPE_V8DI_V16QI_UQI:
38668 case V16SF_FTYPE_V16SF_V16SF_UHI:
38669 case V16SF_FTYPE_V4SF_V16SF_UHI:
38670 case V16SI_FTYPE_SI_V16SI_UHI:
38671 case V16SI_FTYPE_V16HI_V16SI_UHI:
38672 case V16SI_FTYPE_V16QI_V16SI_UHI:
38673 case V8SF_FTYPE_V4SF_V8SF_UQI:
38674 case V4DF_FTYPE_V2DF_V4DF_UQI:
38675 case V8SI_FTYPE_V4SI_V8SI_UQI:
38676 case V8SI_FTYPE_SI_V8SI_UQI:
38677 case V4SI_FTYPE_V4SI_V4SI_UQI:
38678 case V4SI_FTYPE_SI_V4SI_UQI:
38679 case V4DI_FTYPE_V2DI_V4DI_UQI:
38680 case V4DI_FTYPE_DI_V4DI_UQI:
38681 case V2DI_FTYPE_V2DI_V2DI_UQI:
38682 case V2DI_FTYPE_DI_V2DI_UQI:
38683 case V64QI_FTYPE_V64QI_V64QI_UDI:
38684 case V64QI_FTYPE_V16QI_V64QI_UDI:
38685 case V64QI_FTYPE_QI_V64QI_UDI:
38686 case V32QI_FTYPE_V32QI_V32QI_USI:
38687 case V32QI_FTYPE_V16QI_V32QI_USI:
38688 case V32QI_FTYPE_QI_V32QI_USI:
38689 case V16QI_FTYPE_V16QI_V16QI_UHI:
38690 case V16QI_FTYPE_QI_V16QI_UHI:
38691 case V32HI_FTYPE_V8HI_V32HI_USI:
38692 case V32HI_FTYPE_HI_V32HI_USI:
38693 case V16HI_FTYPE_V8HI_V16HI_UHI:
38694 case V16HI_FTYPE_HI_V16HI_UHI:
38695 case V8HI_FTYPE_V8HI_V8HI_UQI:
38696 case V8HI_FTYPE_HI_V8HI_UQI:
38697 case V8SF_FTYPE_V8HI_V8SF_UQI:
38698 case V4SF_FTYPE_V8HI_V4SF_UQI:
38699 case V8SI_FTYPE_V8SF_V8SI_UQI:
38700 case V4SI_FTYPE_V4SF_V4SI_UQI:
38701 case V4DI_FTYPE_V4SF_V4DI_UQI:
38702 case V2DI_FTYPE_V4SF_V2DI_UQI:
38703 case V4SF_FTYPE_V4DI_V4SF_UQI:
38704 case V4SF_FTYPE_V2DI_V4SF_UQI:
38705 case V4DF_FTYPE_V4DI_V4DF_UQI:
38706 case V2DF_FTYPE_V2DI_V2DF_UQI:
38707 case V16QI_FTYPE_V8HI_V16QI_UQI:
38708 case V16QI_FTYPE_V16HI_V16QI_UHI:
38709 case V16QI_FTYPE_V4SI_V16QI_UQI:
38710 case V16QI_FTYPE_V8SI_V16QI_UQI:
38711 case V8HI_FTYPE_V4SI_V8HI_UQI:
38712 case V8HI_FTYPE_V8SI_V8HI_UQI:
38713 case V16QI_FTYPE_V2DI_V16QI_UQI:
38714 case V16QI_FTYPE_V4DI_V16QI_UQI:
38715 case V8HI_FTYPE_V2DI_V8HI_UQI:
38716 case V8HI_FTYPE_V4DI_V8HI_UQI:
38717 case V4SI_FTYPE_V2DI_V4SI_UQI:
38718 case V4SI_FTYPE_V4DI_V4SI_UQI:
38719 case V32QI_FTYPE_V32HI_V32QI_USI:
38720 case UHI_FTYPE_V16QI_V16QI_UHI:
38721 case USI_FTYPE_V32QI_V32QI_USI:
38722 case UDI_FTYPE_V64QI_V64QI_UDI:
38723 case UQI_FTYPE_V8HI_V8HI_UQI:
38724 case UHI_FTYPE_V16HI_V16HI_UHI:
38725 case USI_FTYPE_V32HI_V32HI_USI:
38726 case UQI_FTYPE_V4SI_V4SI_UQI:
38727 case UQI_FTYPE_V8SI_V8SI_UQI:
38728 case UQI_FTYPE_V2DI_V2DI_UQI:
38729 case UQI_FTYPE_V4DI_V4DI_UQI:
38730 case V4SF_FTYPE_V2DF_V4SF_UQI:
38731 case V4SF_FTYPE_V4DF_V4SF_UQI:
38732 case V16SI_FTYPE_V16SI_V16SI_UHI:
38733 case V16SI_FTYPE_V4SI_V16SI_UHI:
38734 case V2DI_FTYPE_V4SI_V2DI_UQI:
38735 case V2DI_FTYPE_V8HI_V2DI_UQI:
38736 case V2DI_FTYPE_V16QI_V2DI_UQI:
38737 case V4DI_FTYPE_V4DI_V4DI_UQI:
38738 case V4DI_FTYPE_V4SI_V4DI_UQI:
38739 case V4DI_FTYPE_V8HI_V4DI_UQI:
38740 case V4DI_FTYPE_V16QI_V4DI_UQI:
38741 case V4DI_FTYPE_V4DF_V4DI_UQI:
38742 case V2DI_FTYPE_V2DF_V2DI_UQI:
38743 case V4SI_FTYPE_V4DF_V4SI_UQI:
38744 case V4SI_FTYPE_V2DF_V4SI_UQI:
38745 case V4SI_FTYPE_V8HI_V4SI_UQI:
38746 case V4SI_FTYPE_V16QI_V4SI_UQI:
38747 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38748 case V8DF_FTYPE_V2DF_V8DF_UQI:
38749 case V8DF_FTYPE_V4DF_V8DF_UQI:
38750 case V8DF_FTYPE_V8DF_V8DF_UQI:
38751 case V8SF_FTYPE_V8SF_V8SF_UQI:
38752 case V8SF_FTYPE_V8SI_V8SF_UQI:
38753 case V4DF_FTYPE_V4DF_V4DF_UQI:
38754 case V4SF_FTYPE_V4SF_V4SF_UQI:
38755 case V2DF_FTYPE_V2DF_V2DF_UQI:
38756 case V2DF_FTYPE_V4SF_V2DF_UQI:
38757 case V2DF_FTYPE_V4SI_V2DF_UQI:
38758 case V4SF_FTYPE_V4SI_V4SF_UQI:
38759 case V4DF_FTYPE_V4SF_V4DF_UQI:
38760 case V4DF_FTYPE_V4SI_V4DF_UQI:
38761 case V8SI_FTYPE_V8SI_V8SI_UQI:
38762 case V8SI_FTYPE_V8HI_V8SI_UQI:
38763 case V8SI_FTYPE_V16QI_V8SI_UQI:
38764 case V8DF_FTYPE_V8SI_V8DF_UQI:
38765 case V8DI_FTYPE_DI_V8DI_UQI:
38766 case V16SF_FTYPE_V8SF_V16SF_UHI:
38767 case V16SI_FTYPE_V8SI_V16SI_UHI:
38768 case V16HI_FTYPE_V16HI_V16HI_UHI:
38769 case V8HI_FTYPE_V16QI_V8HI_UQI:
38770 case V16HI_FTYPE_V16QI_V16HI_UHI:
38771 case V32HI_FTYPE_V32HI_V32HI_USI:
38772 case V32HI_FTYPE_V32QI_V32HI_USI:
38773 case V8DI_FTYPE_V16QI_V8DI_UQI:
38774 case V8DI_FTYPE_V2DI_V8DI_UQI:
38775 case V8DI_FTYPE_V4DI_V8DI_UQI:
38776 case V8DI_FTYPE_V8DI_V8DI_UQI:
38777 case V8DI_FTYPE_V8HI_V8DI_UQI:
38778 case V8DI_FTYPE_V8SI_V8DI_UQI:
38779 case V8HI_FTYPE_V8DI_V8HI_UQI:
38780 case V8SI_FTYPE_V8DI_V8SI_UQI:
38781 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38784 case V32QI_FTYPE_V32QI_V32QI_INT:
38785 case V16HI_FTYPE_V16HI_V16HI_INT:
38786 case V16QI_FTYPE_V16QI_V16QI_INT:
38787 case V4DI_FTYPE_V4DI_V4DI_INT:
38788 case V8HI_FTYPE_V8HI_V8HI_INT:
38789 case V8SI_FTYPE_V8SI_V8SI_INT:
38790 case V8SI_FTYPE_V8SI_V4SI_INT:
38791 case V8SF_FTYPE_V8SF_V8SF_INT:
38792 case V8SF_FTYPE_V8SF_V4SF_INT:
38793 case V4SI_FTYPE_V4SI_V4SI_INT:
38794 case V4DF_FTYPE_V4DF_V4DF_INT:
38795 case V16SF_FTYPE_V16SF_V16SF_INT:
38796 case V16SF_FTYPE_V16SF_V4SF_INT:
38797 case V16SI_FTYPE_V16SI_V4SI_INT:
38798 case V4DF_FTYPE_V4DF_V2DF_INT:
38799 case V4SF_FTYPE_V4SF_V4SF_INT:
38800 case V2DI_FTYPE_V2DI_V2DI_INT:
38801 case V4DI_FTYPE_V4DI_V2DI_INT:
38802 case V2DF_FTYPE_V2DF_V2DF_INT:
38803 case UQI_FTYPE_V8DI_V8UDI_INT:
38804 case UQI_FTYPE_V8DF_V8DF_INT:
38805 case UQI_FTYPE_V2DF_V2DF_INT:
38806 case UQI_FTYPE_V4SF_V4SF_INT:
38807 case UHI_FTYPE_V16SI_V16SI_INT:
38808 case UHI_FTYPE_V16SF_V16SF_INT:
38810 nargs_constant = 1;
38812 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38815 nargs_constant = 1;
38817 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38820 nargs_constant = 1;
38822 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38825 nargs_constant = 1;
38827 case V2DI_FTYPE_V2DI_UINT_UINT:
38829 nargs_constant = 2;
38831 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38834 nargs_constant = 1;
38836 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38840 nargs_constant = 1;
38842 case QI_FTYPE_V8DF_INT_UQI:
38843 case QI_FTYPE_V4DF_INT_UQI:
38844 case QI_FTYPE_V2DF_INT_UQI:
38845 case HI_FTYPE_V16SF_INT_UHI:
38846 case QI_FTYPE_V8SF_INT_UQI:
38847 case QI_FTYPE_V4SF_INT_UQI:
38850 nargs_constant = 1;
38852 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38856 nargs_constant = 1;
38858 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38862 nargs_constant = 1;
38864 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38865 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38866 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38867 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38868 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38869 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38870 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38871 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38872 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38873 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38874 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38875 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38876 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38877 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38878 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38879 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38880 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38881 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38882 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38883 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38884 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38885 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38886 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38887 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38888 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38889 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38890 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38891 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38892 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38893 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38894 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38895 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38896 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38897 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38898 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38899 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38900 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38901 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38902 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38903 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38904 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38905 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38906 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38907 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38908 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38909 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38910 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38911 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38912 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38913 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38914 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38917 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38918 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38919 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38920 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38921 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38923 nargs_constant = 1;
38925 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38926 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38927 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38928 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38929 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38930 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38931 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38932 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38933 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38934 case USI_FTYPE_V32QI_V32QI_INT_USI:
38935 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38936 case USI_FTYPE_V32HI_V32HI_INT_USI:
38937 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38938 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38941 nargs_constant = 1;
38943 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38945 nargs_constant = 2;
38947 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
38948 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
38951 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
38952 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
38955 nargs_constant = 1;
38957 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
38958 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
38959 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
38960 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
38961 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
38962 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
38963 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
38964 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
38965 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
38966 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
38967 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
38968 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
38969 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
38970 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
38971 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
38972 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
38973 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
38974 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
38975 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
38976 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
38977 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
38978 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
38979 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
38980 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
38981 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
38982 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
38983 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
38984 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
38985 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
38986 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
38989 nargs_constant = 1;
38991 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
38992 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
38993 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
38994 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
38995 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
38996 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
38997 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
38998 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
38999 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
39000 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
39001 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
39002 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
39003 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
39004 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
39005 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
39006 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
39007 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
39008 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
39009 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
39010 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
39011 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
39012 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
39013 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
39014 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
39015 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
39016 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
39017 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39020 nargs_constant = 1;
39022 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39023 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39024 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39025 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39026 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39027 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39028 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39029 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39030 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39031 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39035 nargs_constant = 1;
39039 gcc_unreachable ();
39042 gcc_assert (nargs <= ARRAY_SIZE (args));
39044 if (comparison != UNKNOWN)
39046 gcc_assert (nargs == 2);
39047 return ix86_expand_sse_compare (d, exp, target, swap);
39050 if (rmode == VOIDmode || rmode == tmode)
39054 || GET_MODE (target) != tmode
39055 || !insn_p->operand[0].predicate (target, tmode))
39056 target = gen_reg_rtx (tmode);
39057 real_target = target;
39061 real_target = gen_reg_rtx (tmode);
39062 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
39065 for (i = 0; i < nargs; i++)
39067 tree arg = CALL_EXPR_ARG (exp, i);
39068 rtx op = expand_normal (arg);
39069 machine_mode mode = insn_p->operand[i + 1].mode;
39070 bool match = insn_p->operand[i + 1].predicate (op, mode);
39072 if (last_arg_count && (i + 1) == nargs)
39074 /* SIMD shift insns take either an 8-bit immediate or
39075 register as count. But builtin functions take int as
39076 count. If count doesn't match, we put it in register. */
39079 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
39080 if (!insn_p->operand[i + 1].predicate (op, mode))
39081 op = copy_to_reg (op);
39084 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39085 (!mask_pos && (nargs - i) <= nargs_constant))
39090 case CODE_FOR_avx_vinsertf128v4di:
39091 case CODE_FOR_avx_vextractf128v4di:
39092 error ("the last argument must be an 1-bit immediate");
39095 case CODE_FOR_avx512f_cmpv8di3_mask:
39096 case CODE_FOR_avx512f_cmpv16si3_mask:
39097 case CODE_FOR_avx512f_ucmpv8di3_mask:
39098 case CODE_FOR_avx512f_ucmpv16si3_mask:
39099 case CODE_FOR_avx512vl_cmpv4di3_mask:
39100 case CODE_FOR_avx512vl_cmpv8si3_mask:
39101 case CODE_FOR_avx512vl_ucmpv4di3_mask:
39102 case CODE_FOR_avx512vl_ucmpv8si3_mask:
39103 case CODE_FOR_avx512vl_cmpv2di3_mask:
39104 case CODE_FOR_avx512vl_cmpv4si3_mask:
39105 case CODE_FOR_avx512vl_ucmpv2di3_mask:
39106 case CODE_FOR_avx512vl_ucmpv4si3_mask:
39107 error ("the last argument must be a 3-bit immediate");
39110 case CODE_FOR_sse4_1_roundsd:
39111 case CODE_FOR_sse4_1_roundss:
39113 case CODE_FOR_sse4_1_roundpd:
39114 case CODE_FOR_sse4_1_roundps:
39115 case CODE_FOR_avx_roundpd256:
39116 case CODE_FOR_avx_roundps256:
39118 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
39119 case CODE_FOR_sse4_1_roundps_sfix:
39120 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
39121 case CODE_FOR_avx_roundps_sfix256:
39123 case CODE_FOR_sse4_1_blendps:
39124 case CODE_FOR_avx_blendpd256:
39125 case CODE_FOR_avx_vpermilv4df:
39126 case CODE_FOR_avx_vpermilv4df_mask:
39127 case CODE_FOR_avx512f_getmantv8df_mask:
39128 case CODE_FOR_avx512f_getmantv16sf_mask:
39129 case CODE_FOR_avx512vl_getmantv8sf_mask:
39130 case CODE_FOR_avx512vl_getmantv4df_mask:
39131 case CODE_FOR_avx512vl_getmantv4sf_mask:
39132 case CODE_FOR_avx512vl_getmantv2df_mask:
39133 case CODE_FOR_avx512dq_rangepv8df_mask_round:
39134 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
39135 case CODE_FOR_avx512dq_rangepv4df_mask:
39136 case CODE_FOR_avx512dq_rangepv8sf_mask:
39137 case CODE_FOR_avx512dq_rangepv2df_mask:
39138 case CODE_FOR_avx512dq_rangepv4sf_mask:
39139 case CODE_FOR_avx_shufpd256_mask:
39140 error ("the last argument must be a 4-bit immediate");
39143 case CODE_FOR_sha1rnds4:
39144 case CODE_FOR_sse4_1_blendpd:
39145 case CODE_FOR_avx_vpermilv2df:
39146 case CODE_FOR_avx_vpermilv2df_mask:
39147 case CODE_FOR_xop_vpermil2v2df3:
39148 case CODE_FOR_xop_vpermil2v4sf3:
39149 case CODE_FOR_xop_vpermil2v4df3:
39150 case CODE_FOR_xop_vpermil2v8sf3:
39151 case CODE_FOR_avx512f_vinsertf32x4_mask:
39152 case CODE_FOR_avx512f_vinserti32x4_mask:
39153 case CODE_FOR_avx512f_vextractf32x4_mask:
39154 case CODE_FOR_avx512f_vextracti32x4_mask:
39155 case CODE_FOR_sse2_shufpd:
39156 case CODE_FOR_sse2_shufpd_mask:
39157 case CODE_FOR_avx512dq_shuf_f64x2_mask:
39158 case CODE_FOR_avx512dq_shuf_i64x2_mask:
39159 case CODE_FOR_avx512vl_shuf_i32x4_mask:
39160 case CODE_FOR_avx512vl_shuf_f32x4_mask:
39161 error ("the last argument must be a 2-bit immediate");
39164 case CODE_FOR_avx_vextractf128v4df:
39165 case CODE_FOR_avx_vextractf128v8sf:
39166 case CODE_FOR_avx_vextractf128v8si:
39167 case CODE_FOR_avx_vinsertf128v4df:
39168 case CODE_FOR_avx_vinsertf128v8sf:
39169 case CODE_FOR_avx_vinsertf128v8si:
39170 case CODE_FOR_avx512f_vinsertf64x4_mask:
39171 case CODE_FOR_avx512f_vinserti64x4_mask:
39172 case CODE_FOR_avx512f_vextractf64x4_mask:
39173 case CODE_FOR_avx512f_vextracti64x4_mask:
39174 case CODE_FOR_avx512dq_vinsertf32x8_mask:
39175 case CODE_FOR_avx512dq_vinserti32x8_mask:
39176 case CODE_FOR_avx512vl_vinsertv4df:
39177 case CODE_FOR_avx512vl_vinsertv4di:
39178 case CODE_FOR_avx512vl_vinsertv8sf:
39179 case CODE_FOR_avx512vl_vinsertv8si:
39180 error ("the last argument must be a 1-bit immediate");
39183 case CODE_FOR_avx_vmcmpv2df3:
39184 case CODE_FOR_avx_vmcmpv4sf3:
39185 case CODE_FOR_avx_cmpv2df3:
39186 case CODE_FOR_avx_cmpv4sf3:
39187 case CODE_FOR_avx_cmpv4df3:
39188 case CODE_FOR_avx_cmpv8sf3:
39189 case CODE_FOR_avx512f_cmpv8df3_mask:
39190 case CODE_FOR_avx512f_cmpv16sf3_mask:
39191 case CODE_FOR_avx512f_vmcmpv2df3_mask:
39192 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
39193 error ("the last argument must be a 5-bit immediate");
39197 switch (nargs_constant)
39200 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39201 (!mask_pos && (nargs - i) == nargs_constant))
39203 error ("the next to last argument must be an 8-bit immediate");
39207 error ("the last argument must be an 8-bit immediate");
39210 gcc_unreachable ();
39217 if (VECTOR_MODE_P (mode))
39218 op = safe_vector_operand (op, mode);
39220 /* If we aren't optimizing, only allow one memory operand to
39222 if (memory_operand (op, mode))
39225 op = fixup_modeless_constant (op, mode);
39227 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39229 if (optimize || !match || num_memory > 1)
39230 op = copy_to_mode_reg (mode, op);
39234 op = copy_to_reg (op);
39235 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39240 args[i].mode = mode;
39246 pat = GEN_FCN (icode) (real_target, args[0].op);
39249 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
39252 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39256 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39257 args[2].op, args[3].op);
39260 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39261 args[2].op, args[3].op, args[4].op);
39263 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39264 args[2].op, args[3].op, args[4].op,
39268 gcc_unreachable ();
39278 /* Transform pattern of following layout:
39281 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
39289 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
39293 (parallel [ A B ... ]) */
39296 ix86_erase_embedded_rounding (rtx pat)
39298 if (GET_CODE (pat) == INSN)
39299 pat = PATTERN (pat);
39301 gcc_assert (GET_CODE (pat) == PARALLEL);
39303 if (XVECLEN (pat, 0) == 2)
39305 rtx p0 = XVECEXP (pat, 0, 0);
39306 rtx p1 = XVECEXP (pat, 0, 1);
39308 gcc_assert (GET_CODE (p0) == SET
39309 && GET_CODE (p1) == UNSPEC
39310 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
39316 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
39320 for (; i < XVECLEN (pat, 0); ++i)
39322 rtx elem = XVECEXP (pat, 0, i);
39323 if (GET_CODE (elem) != UNSPEC
39324 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
39328 /* No more than 1 occurence was removed. */
39329 gcc_assert (j >= XVECLEN (pat, 0) - 1);
39331 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
39335 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
39338 ix86_expand_sse_comi_round (const struct builtin_description *d,
39339 tree exp, rtx target)
39342 tree arg0 = CALL_EXPR_ARG (exp, 0);
39343 tree arg1 = CALL_EXPR_ARG (exp, 1);
39344 tree arg2 = CALL_EXPR_ARG (exp, 2);
39345 tree arg3 = CALL_EXPR_ARG (exp, 3);
39346 rtx op0 = expand_normal (arg0);
39347 rtx op1 = expand_normal (arg1);
39348 rtx op2 = expand_normal (arg2);
39349 rtx op3 = expand_normal (arg3);
39350 enum insn_code icode = d->icode;
39351 const struct insn_data_d *insn_p = &insn_data[icode];
39352 machine_mode mode0 = insn_p->operand[0].mode;
39353 machine_mode mode1 = insn_p->operand[1].mode;
39354 enum rtx_code comparison = UNEQ;
39355 bool need_ucomi = false;
39357 /* See avxintrin.h for values. */
39358 enum rtx_code comi_comparisons[32] =
39360 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
39361 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
39362 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
39364 bool need_ucomi_values[32] =
39366 true, false, false, true, true, false, false, true,
39367 true, false, false, true, true, false, false, true,
39368 false, true, true, false, false, true, true, false,
39369 false, true, true, false, false, true, true, false
39372 if (!CONST_INT_P (op2))
39374 error ("the third argument must be comparison constant");
39377 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
39379 error ("incorrect comparison mode");
39383 if (!insn_p->operand[2].predicate (op3, SImode))
39385 error ("incorrect rounding operand");
39389 comparison = comi_comparisons[INTVAL (op2)];
39390 need_ucomi = need_ucomi_values[INTVAL (op2)];
39392 if (VECTOR_MODE_P (mode0))
39393 op0 = safe_vector_operand (op0, mode0);
39394 if (VECTOR_MODE_P (mode1))
39395 op1 = safe_vector_operand (op1, mode1);
39397 target = gen_reg_rtx (SImode);
39398 emit_move_insn (target, const0_rtx);
39399 target = gen_rtx_SUBREG (QImode, target, 0);
39401 if ((optimize && !register_operand (op0, mode0))
39402 || !insn_p->operand[0].predicate (op0, mode0))
39403 op0 = copy_to_mode_reg (mode0, op0);
39404 if ((optimize && !register_operand (op1, mode1))
39405 || !insn_p->operand[1].predicate (op1, mode1))
39406 op1 = copy_to_mode_reg (mode1, op1);
39409 icode = icode == CODE_FOR_sse_comi_round
39410 ? CODE_FOR_sse_ucomi_round
39411 : CODE_FOR_sse2_ucomi_round;
39413 pat = GEN_FCN (icode) (op0, op1, op3);
39417 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
39418 if (INTVAL (op3) == NO_ROUND)
39420 pat = ix86_erase_embedded_rounding (pat);
39424 set_dst = SET_DEST (pat);
39428 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
39429 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
39433 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39434 gen_rtx_fmt_ee (comparison, QImode,
39438 return SUBREG_REG (target);
39442 ix86_expand_round_builtin (const struct builtin_description *d,
39443 tree exp, rtx target)
39446 unsigned int i, nargs;
39452 enum insn_code icode = d->icode;
39453 const struct insn_data_d *insn_p = &insn_data[icode];
39454 machine_mode tmode = insn_p->operand[0].mode;
39455 unsigned int nargs_constant = 0;
39456 unsigned int redundant_embed_rnd = 0;
39458 switch ((enum ix86_builtin_func_type) d->flag)
39460 case UINT64_FTYPE_V2DF_INT:
39461 case UINT64_FTYPE_V4SF_INT:
39462 case UINT_FTYPE_V2DF_INT:
39463 case UINT_FTYPE_V4SF_INT:
39464 case INT64_FTYPE_V2DF_INT:
39465 case INT64_FTYPE_V4SF_INT:
39466 case INT_FTYPE_V2DF_INT:
39467 case INT_FTYPE_V4SF_INT:
39470 case V4SF_FTYPE_V4SF_UINT_INT:
39471 case V4SF_FTYPE_V4SF_UINT64_INT:
39472 case V2DF_FTYPE_V2DF_UINT64_INT:
39473 case V4SF_FTYPE_V4SF_INT_INT:
39474 case V4SF_FTYPE_V4SF_INT64_INT:
39475 case V2DF_FTYPE_V2DF_INT64_INT:
39476 case V4SF_FTYPE_V4SF_V4SF_INT:
39477 case V2DF_FTYPE_V2DF_V2DF_INT:
39478 case V4SF_FTYPE_V4SF_V2DF_INT:
39479 case V2DF_FTYPE_V2DF_V4SF_INT:
39482 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39483 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39484 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39485 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39486 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39487 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39488 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39489 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39490 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39491 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39492 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39493 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39494 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39495 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39498 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39499 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39500 nargs_constant = 2;
39503 case INT_FTYPE_V4SF_V4SF_INT_INT:
39504 case INT_FTYPE_V2DF_V2DF_INT_INT:
39505 return ix86_expand_sse_comi_round (d, exp, target);
39506 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39507 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39508 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39509 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39510 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39511 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39514 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39515 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39516 nargs_constant = 4;
39519 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39520 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39521 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39522 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39523 nargs_constant = 3;
39526 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39527 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39528 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39529 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39531 nargs_constant = 4;
39533 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39534 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39535 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39536 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39538 nargs_constant = 3;
39541 gcc_unreachable ();
39543 gcc_assert (nargs <= ARRAY_SIZE (args));
39547 || GET_MODE (target) != tmode
39548 || !insn_p->operand[0].predicate (target, tmode))
39549 target = gen_reg_rtx (tmode);
39551 for (i = 0; i < nargs; i++)
39553 tree arg = CALL_EXPR_ARG (exp, i);
39554 rtx op = expand_normal (arg);
39555 machine_mode mode = insn_p->operand[i + 1].mode;
39556 bool match = insn_p->operand[i + 1].predicate (op, mode);
39558 if (i == nargs - nargs_constant)
39564 case CODE_FOR_avx512f_getmantv8df_mask_round:
39565 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39566 case CODE_FOR_avx512f_vgetmantv2df_round:
39567 case CODE_FOR_avx512f_vgetmantv4sf_round:
39568 error ("the immediate argument must be a 4-bit immediate");
39570 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39571 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39572 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39573 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39574 error ("the immediate argument must be a 5-bit immediate");
39577 error ("the immediate argument must be an 8-bit immediate");
39582 else if (i == nargs-1)
39584 if (!insn_p->operand[nargs].predicate (op, SImode))
39586 error ("incorrect rounding operand");
39590 /* If there is no rounding use normal version of the pattern. */
39591 if (INTVAL (op) == NO_ROUND)
39592 redundant_embed_rnd = 1;
39596 if (VECTOR_MODE_P (mode))
39597 op = safe_vector_operand (op, mode);
39599 op = fixup_modeless_constant (op, mode);
39601 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39603 if (optimize || !match)
39604 op = copy_to_mode_reg (mode, op);
39608 op = copy_to_reg (op);
39609 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39614 args[i].mode = mode;
39620 pat = GEN_FCN (icode) (target, args[0].op);
39623 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39626 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39630 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39631 args[2].op, args[3].op);
39634 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39635 args[2].op, args[3].op, args[4].op);
39637 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39638 args[2].op, args[3].op, args[4].op,
39642 gcc_unreachable ();
39648 if (redundant_embed_rnd)
39649 pat = ix86_erase_embedded_rounding (pat);
39655 /* Subroutine of ix86_expand_builtin to take care of special insns
39656 with variable number of operands. */
39659 ix86_expand_special_args_builtin (const struct builtin_description *d,
39660 tree exp, rtx target)
39664 unsigned int i, nargs, arg_adjust, memory;
39665 bool aligned_mem = false;
39671 enum insn_code icode = d->icode;
39672 bool last_arg_constant = false;
39673 const struct insn_data_d *insn_p = &insn_data[icode];
39674 machine_mode tmode = insn_p->operand[0].mode;
39675 enum { load, store } klass;
39677 switch ((enum ix86_builtin_func_type) d->flag)
39679 case VOID_FTYPE_VOID:
39680 emit_insn (GEN_FCN (icode) (target));
39682 case VOID_FTYPE_UINT64:
39683 case VOID_FTYPE_UNSIGNED:
39689 case INT_FTYPE_VOID:
39690 case USHORT_FTYPE_VOID:
39691 case UINT64_FTYPE_VOID:
39692 case UNSIGNED_FTYPE_VOID:
39697 case UINT64_FTYPE_PUNSIGNED:
39698 case V2DI_FTYPE_PV2DI:
39699 case V4DI_FTYPE_PV4DI:
39700 case V32QI_FTYPE_PCCHAR:
39701 case V16QI_FTYPE_PCCHAR:
39702 case V8SF_FTYPE_PCV4SF:
39703 case V8SF_FTYPE_PCFLOAT:
39704 case V4SF_FTYPE_PCFLOAT:
39705 case V4DF_FTYPE_PCV2DF:
39706 case V4DF_FTYPE_PCDOUBLE:
39707 case V2DF_FTYPE_PCDOUBLE:
39708 case VOID_FTYPE_PVOID:
39709 case V8DI_FTYPE_PV8DI:
39715 case CODE_FOR_sse4_1_movntdqa:
39716 case CODE_FOR_avx2_movntdqa:
39717 case CODE_FOR_avx512f_movntdqa:
39718 aligned_mem = true;
39724 case VOID_FTYPE_PV2SF_V4SF:
39725 case VOID_FTYPE_PV8DI_V8DI:
39726 case VOID_FTYPE_PV4DI_V4DI:
39727 case VOID_FTYPE_PV2DI_V2DI:
39728 case VOID_FTYPE_PCHAR_V32QI:
39729 case VOID_FTYPE_PCHAR_V16QI:
39730 case VOID_FTYPE_PFLOAT_V16SF:
39731 case VOID_FTYPE_PFLOAT_V8SF:
39732 case VOID_FTYPE_PFLOAT_V4SF:
39733 case VOID_FTYPE_PDOUBLE_V8DF:
39734 case VOID_FTYPE_PDOUBLE_V4DF:
39735 case VOID_FTYPE_PDOUBLE_V2DF:
39736 case VOID_FTYPE_PLONGLONG_LONGLONG:
39737 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39738 case VOID_FTYPE_PINT_INT:
39741 /* Reserve memory operand for target. */
39742 memory = ARRAY_SIZE (args);
39745 /* These builtins and instructions require the memory
39746 to be properly aligned. */
39747 case CODE_FOR_avx_movntv4di:
39748 case CODE_FOR_sse2_movntv2di:
39749 case CODE_FOR_avx_movntv8sf:
39750 case CODE_FOR_sse_movntv4sf:
39751 case CODE_FOR_sse4a_vmmovntv4sf:
39752 case CODE_FOR_avx_movntv4df:
39753 case CODE_FOR_sse2_movntv2df:
39754 case CODE_FOR_sse4a_vmmovntv2df:
39755 case CODE_FOR_sse2_movntidi:
39756 case CODE_FOR_sse_movntq:
39757 case CODE_FOR_sse2_movntisi:
39758 case CODE_FOR_avx512f_movntv16sf:
39759 case CODE_FOR_avx512f_movntv8df:
39760 case CODE_FOR_avx512f_movntv8di:
39761 aligned_mem = true;
39767 case V4SF_FTYPE_V4SF_PCV2SF:
39768 case V2DF_FTYPE_V2DF_PCDOUBLE:
39773 case V8SF_FTYPE_PCV8SF_V8SI:
39774 case V4DF_FTYPE_PCV4DF_V4DI:
39775 case V4SF_FTYPE_PCV4SF_V4SI:
39776 case V2DF_FTYPE_PCV2DF_V2DI:
39777 case V8SI_FTYPE_PCV8SI_V8SI:
39778 case V4DI_FTYPE_PCV4DI_V4DI:
39779 case V4SI_FTYPE_PCV4SI_V4SI:
39780 case V2DI_FTYPE_PCV2DI_V2DI:
39785 case VOID_FTYPE_PV8DF_V8DF_UQI:
39786 case VOID_FTYPE_PV4DF_V4DF_UQI:
39787 case VOID_FTYPE_PV2DF_V2DF_UQI:
39788 case VOID_FTYPE_PV16SF_V16SF_UHI:
39789 case VOID_FTYPE_PV8SF_V8SF_UQI:
39790 case VOID_FTYPE_PV4SF_V4SF_UQI:
39791 case VOID_FTYPE_PV8DI_V8DI_UQI:
39792 case VOID_FTYPE_PV4DI_V4DI_UQI:
39793 case VOID_FTYPE_PV2DI_V2DI_UQI:
39794 case VOID_FTYPE_PV16SI_V16SI_UHI:
39795 case VOID_FTYPE_PV8SI_V8SI_UQI:
39796 case VOID_FTYPE_PV4SI_V4SI_UQI:
39799 /* These builtins and instructions require the memory
39800 to be properly aligned. */
39801 case CODE_FOR_avx512f_storev16sf_mask:
39802 case CODE_FOR_avx512f_storev16si_mask:
39803 case CODE_FOR_avx512f_storev8df_mask:
39804 case CODE_FOR_avx512f_storev8di_mask:
39805 case CODE_FOR_avx512vl_storev8sf_mask:
39806 case CODE_FOR_avx512vl_storev8si_mask:
39807 case CODE_FOR_avx512vl_storev4df_mask:
39808 case CODE_FOR_avx512vl_storev4di_mask:
39809 case CODE_FOR_avx512vl_storev4sf_mask:
39810 case CODE_FOR_avx512vl_storev4si_mask:
39811 case CODE_FOR_avx512vl_storev2df_mask:
39812 case CODE_FOR_avx512vl_storev2di_mask:
39813 aligned_mem = true;
39819 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39820 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39821 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39822 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39823 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39824 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39825 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39826 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39827 case VOID_FTYPE_PV8SI_V8DI_UQI:
39828 case VOID_FTYPE_PV8HI_V8DI_UQI:
39829 case VOID_FTYPE_PV16HI_V16SI_UHI:
39830 case VOID_FTYPE_PV16QI_V8DI_UQI:
39831 case VOID_FTYPE_PV16QI_V16SI_UHI:
39832 case VOID_FTYPE_PV4SI_V4DI_UQI:
39833 case VOID_FTYPE_PV4SI_V2DI_UQI:
39834 case VOID_FTYPE_PV8HI_V4DI_UQI:
39835 case VOID_FTYPE_PV8HI_V2DI_UQI:
39836 case VOID_FTYPE_PV8HI_V8SI_UQI:
39837 case VOID_FTYPE_PV8HI_V4SI_UQI:
39838 case VOID_FTYPE_PV16QI_V4DI_UQI:
39839 case VOID_FTYPE_PV16QI_V2DI_UQI:
39840 case VOID_FTYPE_PV16QI_V8SI_UQI:
39841 case VOID_FTYPE_PV16QI_V4SI_UQI:
39842 case VOID_FTYPE_PV8HI_V8HI_UQI:
39843 case VOID_FTYPE_PV16HI_V16HI_UHI:
39844 case VOID_FTYPE_PV32HI_V32HI_USI:
39845 case VOID_FTYPE_PV16QI_V16QI_UHI:
39846 case VOID_FTYPE_PV32QI_V32QI_USI:
39847 case VOID_FTYPE_PV64QI_V64QI_UDI:
39850 /* Reserve memory operand for target. */
39851 memory = ARRAY_SIZE (args);
39853 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39854 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39855 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39856 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39857 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39858 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39859 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39860 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39861 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39862 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39863 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39864 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39865 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39866 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39867 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39868 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39869 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39870 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39876 /* These builtins and instructions require the memory
39877 to be properly aligned. */
39878 case CODE_FOR_avx512f_loadv16sf_mask:
39879 case CODE_FOR_avx512f_loadv16si_mask:
39880 case CODE_FOR_avx512f_loadv8df_mask:
39881 case CODE_FOR_avx512f_loadv8di_mask:
39882 case CODE_FOR_avx512vl_loadv8sf_mask:
39883 case CODE_FOR_avx512vl_loadv8si_mask:
39884 case CODE_FOR_avx512vl_loadv4df_mask:
39885 case CODE_FOR_avx512vl_loadv4di_mask:
39886 case CODE_FOR_avx512vl_loadv4sf_mask:
39887 case CODE_FOR_avx512vl_loadv4si_mask:
39888 case CODE_FOR_avx512vl_loadv2df_mask:
39889 case CODE_FOR_avx512vl_loadv2di_mask:
39890 case CODE_FOR_avx512bw_loadv64qi_mask:
39891 case CODE_FOR_avx512vl_loadv32qi_mask:
39892 case CODE_FOR_avx512vl_loadv16qi_mask:
39893 case CODE_FOR_avx512bw_loadv32hi_mask:
39894 case CODE_FOR_avx512vl_loadv16hi_mask:
39895 case CODE_FOR_avx512vl_loadv8hi_mask:
39896 aligned_mem = true;
39902 case VOID_FTYPE_UINT_UINT_UINT:
39903 case VOID_FTYPE_UINT64_UINT_UINT:
39904 case UCHAR_FTYPE_UINT_UINT_UINT:
39905 case UCHAR_FTYPE_UINT64_UINT_UINT:
39908 memory = ARRAY_SIZE (args);
39909 last_arg_constant = true;
39912 gcc_unreachable ();
39915 gcc_assert (nargs <= ARRAY_SIZE (args));
39917 if (klass == store)
39919 arg = CALL_EXPR_ARG (exp, 0);
39920 op = expand_normal (arg);
39921 gcc_assert (target == 0);
39924 op = ix86_zero_extend_to_Pmode (op);
39925 target = gen_rtx_MEM (tmode, op);
39926 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39927 on it. Try to improve it using get_pointer_alignment,
39928 and if the special builtin is one that requires strict
39929 mode alignment, also from it's GET_MODE_ALIGNMENT.
39930 Failure to do so could lead to ix86_legitimate_combined_insn
39931 rejecting all changes to such insns. */
39932 unsigned int align = get_pointer_alignment (arg);
39933 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39934 align = GET_MODE_ALIGNMENT (tmode);
39935 if (MEM_ALIGN (target) < align)
39936 set_mem_align (target, align);
39939 target = force_reg (tmode, op);
39947 || !register_operand (target, tmode)
39948 || GET_MODE (target) != tmode)
39949 target = gen_reg_rtx (tmode);
39952 for (i = 0; i < nargs; i++)
39954 machine_mode mode = insn_p->operand[i + 1].mode;
39957 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
39958 op = expand_normal (arg);
39959 match = insn_p->operand[i + 1].predicate (op, mode);
39961 if (last_arg_constant && (i + 1) == nargs)
39965 if (icode == CODE_FOR_lwp_lwpvalsi3
39966 || icode == CODE_FOR_lwp_lwpinssi3
39967 || icode == CODE_FOR_lwp_lwpvaldi3
39968 || icode == CODE_FOR_lwp_lwpinsdi3)
39969 error ("the last argument must be a 32-bit immediate");
39971 error ("the last argument must be an 8-bit immediate");
39979 /* This must be the memory operand. */
39980 op = ix86_zero_extend_to_Pmode (op);
39981 op = gen_rtx_MEM (mode, op);
39982 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
39983 on it. Try to improve it using get_pointer_alignment,
39984 and if the special builtin is one that requires strict
39985 mode alignment, also from it's GET_MODE_ALIGNMENT.
39986 Failure to do so could lead to ix86_legitimate_combined_insn
39987 rejecting all changes to such insns. */
39988 unsigned int align = get_pointer_alignment (arg);
39989 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
39990 align = GET_MODE_ALIGNMENT (mode);
39991 if (MEM_ALIGN (op) < align)
39992 set_mem_align (op, align);
39996 /* This must be register. */
39997 if (VECTOR_MODE_P (mode))
39998 op = safe_vector_operand (op, mode);
40000 op = fixup_modeless_constant (op, mode);
40002 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
40003 op = copy_to_mode_reg (mode, op);
40006 op = copy_to_reg (op);
40007 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
40013 args[i].mode = mode;
40019 pat = GEN_FCN (icode) (target);
40022 pat = GEN_FCN (icode) (target, args[0].op);
40025 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40028 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
40031 gcc_unreachable ();
40037 return klass == store ? 0 : target;
40040 /* Return the integer constant in ARG. Constrain it to be in the range
40041 of the subparts of VEC_TYPE; issue an error if not. */
40044 get_element_number (tree vec_type, tree arg)
40046 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
40048 if (!tree_fits_uhwi_p (arg)
40049 || (elt = tree_to_uhwi (arg), elt > max))
40051 error ("selector must be an integer constant in the range 0..%wi", max);
40058 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40059 ix86_expand_vector_init. We DO have language-level syntax for this, in
40060 the form of (type){ init-list }. Except that since we can't place emms
40061 instructions from inside the compiler, we can't allow the use of MMX
40062 registers unless the user explicitly asks for it. So we do *not* define
40063 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
40064 we have builtins invoked by mmintrin.h that gives us license to emit
40065 these sorts of instructions. */
40068 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
40070 machine_mode tmode = TYPE_MODE (type);
40071 machine_mode inner_mode = GET_MODE_INNER (tmode);
40072 int i, n_elt = GET_MODE_NUNITS (tmode);
40073 rtvec v = rtvec_alloc (n_elt);
40075 gcc_assert (VECTOR_MODE_P (tmode));
40076 gcc_assert (call_expr_nargs (exp) == n_elt);
40078 for (i = 0; i < n_elt; ++i)
40080 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
40081 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
40084 if (!target || !register_operand (target, tmode))
40085 target = gen_reg_rtx (tmode);
40087 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
40091 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40092 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
40093 had a language-level syntax for referencing vector elements. */
40096 ix86_expand_vec_ext_builtin (tree exp, rtx target)
40098 machine_mode tmode, mode0;
40103 arg0 = CALL_EXPR_ARG (exp, 0);
40104 arg1 = CALL_EXPR_ARG (exp, 1);
40106 op0 = expand_normal (arg0);
40107 elt = get_element_number (TREE_TYPE (arg0), arg1);
40109 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40110 mode0 = TYPE_MODE (TREE_TYPE (arg0));
40111 gcc_assert (VECTOR_MODE_P (mode0));
40113 op0 = force_reg (mode0, op0);
40115 if (optimize || !target || !register_operand (target, tmode))
40116 target = gen_reg_rtx (tmode);
40118 ix86_expand_vector_extract (true, target, op0, elt);
40123 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40124 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
40125 a language-level syntax for referencing vector elements. */
40128 ix86_expand_vec_set_builtin (tree exp)
40130 machine_mode tmode, mode1;
40131 tree arg0, arg1, arg2;
40133 rtx op0, op1, target;
40135 arg0 = CALL_EXPR_ARG (exp, 0);
40136 arg1 = CALL_EXPR_ARG (exp, 1);
40137 arg2 = CALL_EXPR_ARG (exp, 2);
40139 tmode = TYPE_MODE (TREE_TYPE (arg0));
40140 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40141 gcc_assert (VECTOR_MODE_P (tmode));
40143 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
40144 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
40145 elt = get_element_number (TREE_TYPE (arg0), arg2);
40147 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
40148 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
40150 op0 = force_reg (tmode, op0);
40151 op1 = force_reg (mode1, op1);
40153 /* OP0 is the source of these builtin functions and shouldn't be
40154 modified. Create a copy, use it and return it as target. */
40155 target = gen_reg_rtx (tmode);
40156 emit_move_insn (target, op0);
40157 ix86_expand_vector_set (true, target, op1, elt);
40162 /* Emit conditional move of SRC to DST with condition
40165 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
40171 t = ix86_expand_compare (code, op1, op2);
40172 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
40177 rtx_code_label *nomove = gen_label_rtx ();
40178 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
40179 const0_rtx, GET_MODE (op1), 1, nomove);
40180 emit_move_insn (dst, src);
40181 emit_label (nomove);
40185 /* Choose max of DST and SRC and put it to DST. */
40187 ix86_emit_move_max (rtx dst, rtx src)
40189 ix86_emit_cmove (dst, src, LTU, dst, src);
40192 /* Expand an expression EXP that calls a built-in function,
40193 with result going to TARGET if that's convenient
40194 (and in mode MODE if that's convenient).
40195 SUBTARGET may be used as the target for computing one of EXP's operands.
40196 IGNORE is nonzero if the value is to be ignored. */
40199 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
40200 machine_mode mode, int ignore)
40202 const struct builtin_description *d;
40204 enum insn_code icode;
40205 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
40206 tree arg0, arg1, arg2, arg3, arg4;
40207 rtx op0, op1, op2, op3, op4, pat, insn;
40208 machine_mode mode0, mode1, mode2, mode3, mode4;
40209 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
40211 /* For CPU builtins that can be folded, fold first and expand the fold. */
40214 case IX86_BUILTIN_CPU_INIT:
40216 /* Make it call __cpu_indicator_init in libgcc. */
40217 tree call_expr, fndecl, type;
40218 type = build_function_type_list (integer_type_node, NULL_TREE);
40219 fndecl = build_fn_decl ("__cpu_indicator_init", type);
40220 call_expr = build_call_expr (fndecl, 0);
40221 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
40223 case IX86_BUILTIN_CPU_IS:
40224 case IX86_BUILTIN_CPU_SUPPORTS:
40226 tree arg0 = CALL_EXPR_ARG (exp, 0);
40227 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
40228 gcc_assert (fold_expr != NULL_TREE);
40229 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
40233 /* Determine whether the builtin function is available under the current ISA.
40234 Originally the builtin was not created if it wasn't applicable to the
40235 current ISA based on the command line switches. With function specific
40236 options, we need to check in the context of the function making the call
40237 whether it is supported. */
40238 if (ix86_builtins_isa[fcode].isa
40239 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
40241 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
40242 NULL, (enum fpmath_unit) 0, false);
40245 error ("%qE needs unknown isa option", fndecl);
40248 gcc_assert (opts != NULL);
40249 error ("%qE needs isa option %s", fndecl, opts);
40257 case IX86_BUILTIN_BNDMK:
40259 || GET_MODE (target) != BNDmode
40260 || !register_operand (target, BNDmode))
40261 target = gen_reg_rtx (BNDmode);
40263 arg0 = CALL_EXPR_ARG (exp, 0);
40264 arg1 = CALL_EXPR_ARG (exp, 1);
40266 op0 = expand_normal (arg0);
40267 op1 = expand_normal (arg1);
40269 if (!register_operand (op0, Pmode))
40270 op0 = ix86_zero_extend_to_Pmode (op0);
40271 if (!register_operand (op1, Pmode))
40272 op1 = ix86_zero_extend_to_Pmode (op1);
40274 /* Builtin arg1 is size of block but instruction op1 should
40276 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
40277 NULL_RTX, 1, OPTAB_DIRECT);
40279 emit_insn (BNDmode == BND64mode
40280 ? gen_bnd64_mk (target, op0, op1)
40281 : gen_bnd32_mk (target, op0, op1));
40284 case IX86_BUILTIN_BNDSTX:
40285 arg0 = CALL_EXPR_ARG (exp, 0);
40286 arg1 = CALL_EXPR_ARG (exp, 1);
40287 arg2 = CALL_EXPR_ARG (exp, 2);
40289 op0 = expand_normal (arg0);
40290 op1 = expand_normal (arg1);
40291 op2 = expand_normal (arg2);
40293 if (!register_operand (op0, Pmode))
40294 op0 = ix86_zero_extend_to_Pmode (op0);
40295 if (!register_operand (op1, BNDmode))
40296 op1 = copy_to_mode_reg (BNDmode, op1);
40297 if (!register_operand (op2, Pmode))
40298 op2 = ix86_zero_extend_to_Pmode (op2);
40300 emit_insn (BNDmode == BND64mode
40301 ? gen_bnd64_stx (op2, op0, op1)
40302 : gen_bnd32_stx (op2, op0, op1));
40305 case IX86_BUILTIN_BNDLDX:
40307 || GET_MODE (target) != BNDmode
40308 || !register_operand (target, BNDmode))
40309 target = gen_reg_rtx (BNDmode);
40311 arg0 = CALL_EXPR_ARG (exp, 0);
40312 arg1 = CALL_EXPR_ARG (exp, 1);
40314 op0 = expand_normal (arg0);
40315 op1 = expand_normal (arg1);
40317 if (!register_operand (op0, Pmode))
40318 op0 = ix86_zero_extend_to_Pmode (op0);
40319 if (!register_operand (op1, Pmode))
40320 op1 = ix86_zero_extend_to_Pmode (op1);
40322 emit_insn (BNDmode == BND64mode
40323 ? gen_bnd64_ldx (target, op0, op1)
40324 : gen_bnd32_ldx (target, op0, op1));
40327 case IX86_BUILTIN_BNDCL:
40328 arg0 = CALL_EXPR_ARG (exp, 0);
40329 arg1 = CALL_EXPR_ARG (exp, 1);
40331 op0 = expand_normal (arg0);
40332 op1 = expand_normal (arg1);
40334 if (!register_operand (op0, Pmode))
40335 op0 = ix86_zero_extend_to_Pmode (op0);
40336 if (!register_operand (op1, BNDmode))
40337 op1 = copy_to_mode_reg (BNDmode, op1);
40339 emit_insn (BNDmode == BND64mode
40340 ? gen_bnd64_cl (op1, op0)
40341 : gen_bnd32_cl (op1, op0));
40344 case IX86_BUILTIN_BNDCU:
40345 arg0 = CALL_EXPR_ARG (exp, 0);
40346 arg1 = CALL_EXPR_ARG (exp, 1);
40348 op0 = expand_normal (arg0);
40349 op1 = expand_normal (arg1);
40351 if (!register_operand (op0, Pmode))
40352 op0 = ix86_zero_extend_to_Pmode (op0);
40353 if (!register_operand (op1, BNDmode))
40354 op1 = copy_to_mode_reg (BNDmode, op1);
40356 emit_insn (BNDmode == BND64mode
40357 ? gen_bnd64_cu (op1, op0)
40358 : gen_bnd32_cu (op1, op0));
40361 case IX86_BUILTIN_BNDRET:
40362 arg0 = CALL_EXPR_ARG (exp, 0);
40363 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
40364 target = chkp_get_rtl_bounds (arg0);
40366 /* If no bounds were specified for returned value,
40367 then use INIT bounds. It usually happens when
40368 some built-in function is expanded. */
40371 rtx t1 = gen_reg_rtx (Pmode);
40372 rtx t2 = gen_reg_rtx (Pmode);
40373 target = gen_reg_rtx (BNDmode);
40374 emit_move_insn (t1, const0_rtx);
40375 emit_move_insn (t2, constm1_rtx);
40376 emit_insn (BNDmode == BND64mode
40377 ? gen_bnd64_mk (target, t1, t2)
40378 : gen_bnd32_mk (target, t1, t2));
40381 gcc_assert (target && REG_P (target));
40384 case IX86_BUILTIN_BNDNARROW:
40386 rtx m1, m1h1, m1h2, lb, ub, t1;
40388 /* Return value and lb. */
40389 arg0 = CALL_EXPR_ARG (exp, 0);
40391 arg1 = CALL_EXPR_ARG (exp, 1);
40393 arg2 = CALL_EXPR_ARG (exp, 2);
40395 lb = expand_normal (arg0);
40396 op1 = expand_normal (arg1);
40397 op2 = expand_normal (arg2);
40399 /* Size was passed but we need to use (size - 1) as for bndmk. */
40400 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
40401 NULL_RTX, 1, OPTAB_DIRECT);
40403 /* Add LB to size and inverse to get UB. */
40404 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
40405 op2, 1, OPTAB_DIRECT);
40406 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
40408 if (!register_operand (lb, Pmode))
40409 lb = ix86_zero_extend_to_Pmode (lb);
40410 if (!register_operand (ub, Pmode))
40411 ub = ix86_zero_extend_to_Pmode (ub);
40413 /* We need to move bounds to memory before any computations. */
40418 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
40419 emit_move_insn (m1, op1);
40422 /* Generate mem expression to be used for access to LB and UB. */
40423 m1h1 = adjust_address (m1, Pmode, 0);
40424 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
40426 t1 = gen_reg_rtx (Pmode);
40429 emit_move_insn (t1, m1h1);
40430 ix86_emit_move_max (t1, lb);
40431 emit_move_insn (m1h1, t1);
40433 /* Compute UB. UB is stored in 1's complement form. Therefore
40434 we also use max here. */
40435 emit_move_insn (t1, m1h2);
40436 ix86_emit_move_max (t1, ub);
40437 emit_move_insn (m1h2, t1);
40439 op2 = gen_reg_rtx (BNDmode);
40440 emit_move_insn (op2, m1);
40442 return chkp_join_splitted_slot (lb, op2);
40445 case IX86_BUILTIN_BNDINT:
40447 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
40450 || GET_MODE (target) != BNDmode
40451 || !register_operand (target, BNDmode))
40452 target = gen_reg_rtx (BNDmode);
40454 arg0 = CALL_EXPR_ARG (exp, 0);
40455 arg1 = CALL_EXPR_ARG (exp, 1);
40457 op0 = expand_normal (arg0);
40458 op1 = expand_normal (arg1);
40460 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40461 rh1 = adjust_address (res, Pmode, 0);
40462 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40464 /* Put first bounds to temporaries. */
40465 lb1 = gen_reg_rtx (Pmode);
40466 ub1 = gen_reg_rtx (Pmode);
40469 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40470 emit_move_insn (ub1, adjust_address (op0, Pmode,
40471 GET_MODE_SIZE (Pmode)));
40475 emit_move_insn (res, op0);
40476 emit_move_insn (lb1, rh1);
40477 emit_move_insn (ub1, rh2);
40480 /* Put second bounds to temporaries. */
40481 lb2 = gen_reg_rtx (Pmode);
40482 ub2 = gen_reg_rtx (Pmode);
40485 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40486 emit_move_insn (ub2, adjust_address (op1, Pmode,
40487 GET_MODE_SIZE (Pmode)));
40491 emit_move_insn (res, op1);
40492 emit_move_insn (lb2, rh1);
40493 emit_move_insn (ub2, rh2);
40497 ix86_emit_move_max (lb1, lb2);
40498 emit_move_insn (rh1, lb1);
40500 /* Compute UB. UB is stored in 1's complement form. Therefore
40501 we also use max here. */
40502 ix86_emit_move_max (ub1, ub2);
40503 emit_move_insn (rh2, ub1);
40505 emit_move_insn (target, res);
40510 case IX86_BUILTIN_SIZEOF:
40516 || GET_MODE (target) != Pmode
40517 || !register_operand (target, Pmode))
40518 target = gen_reg_rtx (Pmode);
40520 arg0 = CALL_EXPR_ARG (exp, 0);
40521 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40523 name = DECL_ASSEMBLER_NAME (arg0);
40524 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40526 emit_insn (Pmode == SImode
40527 ? gen_move_size_reloc_si (target, symbol)
40528 : gen_move_size_reloc_di (target, symbol));
40533 case IX86_BUILTIN_BNDLOWER:
40538 || GET_MODE (target) != Pmode
40539 || !register_operand (target, Pmode))
40540 target = gen_reg_rtx (Pmode);
40542 arg0 = CALL_EXPR_ARG (exp, 0);
40543 op0 = expand_normal (arg0);
40545 /* We need to move bounds to memory first. */
40550 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40551 emit_move_insn (mem, op0);
40554 /* Generate mem expression to access LB and load it. */
40555 hmem = adjust_address (mem, Pmode, 0);
40556 emit_move_insn (target, hmem);
40561 case IX86_BUILTIN_BNDUPPER:
40563 rtx mem, hmem, res;
40566 || GET_MODE (target) != Pmode
40567 || !register_operand (target, Pmode))
40568 target = gen_reg_rtx (Pmode);
40570 arg0 = CALL_EXPR_ARG (exp, 0);
40571 op0 = expand_normal (arg0);
40573 /* We need to move bounds to memory first. */
40578 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40579 emit_move_insn (mem, op0);
40582 /* Generate mem expression to access UB. */
40583 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40585 /* We need to inverse all bits of UB. */
40586 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40589 emit_move_insn (target, res);
40594 case IX86_BUILTIN_MASKMOVQ:
40595 case IX86_BUILTIN_MASKMOVDQU:
40596 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40597 ? CODE_FOR_mmx_maskmovq
40598 : CODE_FOR_sse2_maskmovdqu);
40599 /* Note the arg order is different from the operand order. */
40600 arg1 = CALL_EXPR_ARG (exp, 0);
40601 arg2 = CALL_EXPR_ARG (exp, 1);
40602 arg0 = CALL_EXPR_ARG (exp, 2);
40603 op0 = expand_normal (arg0);
40604 op1 = expand_normal (arg1);
40605 op2 = expand_normal (arg2);
40606 mode0 = insn_data[icode].operand[0].mode;
40607 mode1 = insn_data[icode].operand[1].mode;
40608 mode2 = insn_data[icode].operand[2].mode;
40610 op0 = ix86_zero_extend_to_Pmode (op0);
40611 op0 = gen_rtx_MEM (mode1, op0);
40613 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40614 op0 = copy_to_mode_reg (mode0, op0);
40615 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40616 op1 = copy_to_mode_reg (mode1, op1);
40617 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40618 op2 = copy_to_mode_reg (mode2, op2);
40619 pat = GEN_FCN (icode) (op0, op1, op2);
40625 case IX86_BUILTIN_LDMXCSR:
40626 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40627 target = assign_386_stack_local (SImode, SLOT_TEMP);
40628 emit_move_insn (target, op0);
40629 emit_insn (gen_sse_ldmxcsr (target));
40632 case IX86_BUILTIN_STMXCSR:
40633 target = assign_386_stack_local (SImode, SLOT_TEMP);
40634 emit_insn (gen_sse_stmxcsr (target));
40635 return copy_to_mode_reg (SImode, target);
40637 case IX86_BUILTIN_CLFLUSH:
40638 arg0 = CALL_EXPR_ARG (exp, 0);
40639 op0 = expand_normal (arg0);
40640 icode = CODE_FOR_sse2_clflush;
40641 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40642 op0 = ix86_zero_extend_to_Pmode (op0);
40644 emit_insn (gen_sse2_clflush (op0));
40647 case IX86_BUILTIN_CLWB:
40648 arg0 = CALL_EXPR_ARG (exp, 0);
40649 op0 = expand_normal (arg0);
40650 icode = CODE_FOR_clwb;
40651 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40652 op0 = ix86_zero_extend_to_Pmode (op0);
40654 emit_insn (gen_clwb (op0));
40657 case IX86_BUILTIN_CLFLUSHOPT:
40658 arg0 = CALL_EXPR_ARG (exp, 0);
40659 op0 = expand_normal (arg0);
40660 icode = CODE_FOR_clflushopt;
40661 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40662 op0 = ix86_zero_extend_to_Pmode (op0);
40664 emit_insn (gen_clflushopt (op0));
40667 case IX86_BUILTIN_MONITOR:
40668 case IX86_BUILTIN_MONITORX:
40669 arg0 = CALL_EXPR_ARG (exp, 0);
40670 arg1 = CALL_EXPR_ARG (exp, 1);
40671 arg2 = CALL_EXPR_ARG (exp, 2);
40672 op0 = expand_normal (arg0);
40673 op1 = expand_normal (arg1);
40674 op2 = expand_normal (arg2);
40676 op0 = ix86_zero_extend_to_Pmode (op0);
40678 op1 = copy_to_mode_reg (SImode, op1);
40680 op2 = copy_to_mode_reg (SImode, op2);
40682 emit_insn (fcode == IX86_BUILTIN_MONITOR
40683 ? ix86_gen_monitor (op0, op1, op2)
40684 : ix86_gen_monitorx (op0, op1, op2));
40687 case IX86_BUILTIN_MWAIT:
40688 arg0 = CALL_EXPR_ARG (exp, 0);
40689 arg1 = CALL_EXPR_ARG (exp, 1);
40690 op0 = expand_normal (arg0);
40691 op1 = expand_normal (arg1);
40693 op0 = copy_to_mode_reg (SImode, op0);
40695 op1 = copy_to_mode_reg (SImode, op1);
40696 emit_insn (gen_sse3_mwait (op0, op1));
40699 case IX86_BUILTIN_MWAITX:
40700 arg0 = CALL_EXPR_ARG (exp, 0);
40701 arg1 = CALL_EXPR_ARG (exp, 1);
40702 arg2 = CALL_EXPR_ARG (exp, 2);
40703 op0 = expand_normal (arg0);
40704 op1 = expand_normal (arg1);
40705 op2 = expand_normal (arg2);
40707 op0 = copy_to_mode_reg (SImode, op0);
40709 op1 = copy_to_mode_reg (SImode, op1);
40711 op2 = copy_to_mode_reg (SImode, op2);
40712 emit_insn (gen_mwaitx (op0, op1, op2));
40715 case IX86_BUILTIN_CLZERO:
40716 arg0 = CALL_EXPR_ARG (exp, 0);
40717 op0 = expand_normal (arg0);
40719 op0 = ix86_zero_extend_to_Pmode (op0);
40720 emit_insn (ix86_gen_clzero (op0));
40723 case IX86_BUILTIN_VEC_INIT_V2SI:
40724 case IX86_BUILTIN_VEC_INIT_V4HI:
40725 case IX86_BUILTIN_VEC_INIT_V8QI:
40726 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40728 case IX86_BUILTIN_VEC_EXT_V2DF:
40729 case IX86_BUILTIN_VEC_EXT_V2DI:
40730 case IX86_BUILTIN_VEC_EXT_V4SF:
40731 case IX86_BUILTIN_VEC_EXT_V4SI:
40732 case IX86_BUILTIN_VEC_EXT_V8HI:
40733 case IX86_BUILTIN_VEC_EXT_V2SI:
40734 case IX86_BUILTIN_VEC_EXT_V4HI:
40735 case IX86_BUILTIN_VEC_EXT_V16QI:
40736 return ix86_expand_vec_ext_builtin (exp, target);
40738 case IX86_BUILTIN_VEC_SET_V2DI:
40739 case IX86_BUILTIN_VEC_SET_V4SF:
40740 case IX86_BUILTIN_VEC_SET_V4SI:
40741 case IX86_BUILTIN_VEC_SET_V8HI:
40742 case IX86_BUILTIN_VEC_SET_V4HI:
40743 case IX86_BUILTIN_VEC_SET_V16QI:
40744 return ix86_expand_vec_set_builtin (exp);
40746 case IX86_BUILTIN_INFQ:
40747 case IX86_BUILTIN_HUGE_VALQ:
40749 REAL_VALUE_TYPE inf;
40753 tmp = const_double_from_real_value (inf, mode);
40755 tmp = validize_mem (force_const_mem (mode, tmp));
40758 target = gen_reg_rtx (mode);
40760 emit_move_insn (target, tmp);
40764 case IX86_BUILTIN_RDPMC:
40765 case IX86_BUILTIN_RDTSC:
40766 case IX86_BUILTIN_RDTSCP:
40768 op0 = gen_reg_rtx (DImode);
40769 op1 = gen_reg_rtx (DImode);
40771 if (fcode == IX86_BUILTIN_RDPMC)
40773 arg0 = CALL_EXPR_ARG (exp, 0);
40774 op2 = expand_normal (arg0);
40775 if (!register_operand (op2, SImode))
40776 op2 = copy_to_mode_reg (SImode, op2);
40778 insn = (TARGET_64BIT
40779 ? gen_rdpmc_rex64 (op0, op1, op2)
40780 : gen_rdpmc (op0, op2));
40783 else if (fcode == IX86_BUILTIN_RDTSC)
40785 insn = (TARGET_64BIT
40786 ? gen_rdtsc_rex64 (op0, op1)
40787 : gen_rdtsc (op0));
40792 op2 = gen_reg_rtx (SImode);
40794 insn = (TARGET_64BIT
40795 ? gen_rdtscp_rex64 (op0, op1, op2)
40796 : gen_rdtscp (op0, op2));
40799 arg0 = CALL_EXPR_ARG (exp, 0);
40800 op4 = expand_normal (arg0);
40801 if (!address_operand (op4, VOIDmode))
40803 op4 = convert_memory_address (Pmode, op4);
40804 op4 = copy_addr_to_reg (op4);
40806 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40811 /* mode is VOIDmode if __builtin_rd* has been called
40813 if (mode == VOIDmode)
40815 target = gen_reg_rtx (mode);
40820 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40821 op1, 1, OPTAB_DIRECT);
40822 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40823 op0, 1, OPTAB_DIRECT);
40826 emit_move_insn (target, op0);
40829 case IX86_BUILTIN_FXSAVE:
40830 case IX86_BUILTIN_FXRSTOR:
40831 case IX86_BUILTIN_FXSAVE64:
40832 case IX86_BUILTIN_FXRSTOR64:
40833 case IX86_BUILTIN_FNSTENV:
40834 case IX86_BUILTIN_FLDENV:
40838 case IX86_BUILTIN_FXSAVE:
40839 icode = CODE_FOR_fxsave;
40841 case IX86_BUILTIN_FXRSTOR:
40842 icode = CODE_FOR_fxrstor;
40844 case IX86_BUILTIN_FXSAVE64:
40845 icode = CODE_FOR_fxsave64;
40847 case IX86_BUILTIN_FXRSTOR64:
40848 icode = CODE_FOR_fxrstor64;
40850 case IX86_BUILTIN_FNSTENV:
40851 icode = CODE_FOR_fnstenv;
40853 case IX86_BUILTIN_FLDENV:
40854 icode = CODE_FOR_fldenv;
40857 gcc_unreachable ();
40860 arg0 = CALL_EXPR_ARG (exp, 0);
40861 op0 = expand_normal (arg0);
40863 if (!address_operand (op0, VOIDmode))
40865 op0 = convert_memory_address (Pmode, op0);
40866 op0 = copy_addr_to_reg (op0);
40868 op0 = gen_rtx_MEM (mode0, op0);
40870 pat = GEN_FCN (icode) (op0);
40875 case IX86_BUILTIN_XSAVE:
40876 case IX86_BUILTIN_XRSTOR:
40877 case IX86_BUILTIN_XSAVE64:
40878 case IX86_BUILTIN_XRSTOR64:
40879 case IX86_BUILTIN_XSAVEOPT:
40880 case IX86_BUILTIN_XSAVEOPT64:
40881 case IX86_BUILTIN_XSAVES:
40882 case IX86_BUILTIN_XRSTORS:
40883 case IX86_BUILTIN_XSAVES64:
40884 case IX86_BUILTIN_XRSTORS64:
40885 case IX86_BUILTIN_XSAVEC:
40886 case IX86_BUILTIN_XSAVEC64:
40887 arg0 = CALL_EXPR_ARG (exp, 0);
40888 arg1 = CALL_EXPR_ARG (exp, 1);
40889 op0 = expand_normal (arg0);
40890 op1 = expand_normal (arg1);
40892 if (!address_operand (op0, VOIDmode))
40894 op0 = convert_memory_address (Pmode, op0);
40895 op0 = copy_addr_to_reg (op0);
40897 op0 = gen_rtx_MEM (BLKmode, op0);
40899 op1 = force_reg (DImode, op1);
40903 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40904 NULL, 1, OPTAB_DIRECT);
40907 case IX86_BUILTIN_XSAVE:
40908 icode = CODE_FOR_xsave_rex64;
40910 case IX86_BUILTIN_XRSTOR:
40911 icode = CODE_FOR_xrstor_rex64;
40913 case IX86_BUILTIN_XSAVE64:
40914 icode = CODE_FOR_xsave64;
40916 case IX86_BUILTIN_XRSTOR64:
40917 icode = CODE_FOR_xrstor64;
40919 case IX86_BUILTIN_XSAVEOPT:
40920 icode = CODE_FOR_xsaveopt_rex64;
40922 case IX86_BUILTIN_XSAVEOPT64:
40923 icode = CODE_FOR_xsaveopt64;
40925 case IX86_BUILTIN_XSAVES:
40926 icode = CODE_FOR_xsaves_rex64;
40928 case IX86_BUILTIN_XRSTORS:
40929 icode = CODE_FOR_xrstors_rex64;
40931 case IX86_BUILTIN_XSAVES64:
40932 icode = CODE_FOR_xsaves64;
40934 case IX86_BUILTIN_XRSTORS64:
40935 icode = CODE_FOR_xrstors64;
40937 case IX86_BUILTIN_XSAVEC:
40938 icode = CODE_FOR_xsavec_rex64;
40940 case IX86_BUILTIN_XSAVEC64:
40941 icode = CODE_FOR_xsavec64;
40944 gcc_unreachable ();
40947 op2 = gen_lowpart (SImode, op2);
40948 op1 = gen_lowpart (SImode, op1);
40949 pat = GEN_FCN (icode) (op0, op1, op2);
40955 case IX86_BUILTIN_XSAVE:
40956 icode = CODE_FOR_xsave;
40958 case IX86_BUILTIN_XRSTOR:
40959 icode = CODE_FOR_xrstor;
40961 case IX86_BUILTIN_XSAVEOPT:
40962 icode = CODE_FOR_xsaveopt;
40964 case IX86_BUILTIN_XSAVES:
40965 icode = CODE_FOR_xsaves;
40967 case IX86_BUILTIN_XRSTORS:
40968 icode = CODE_FOR_xrstors;
40970 case IX86_BUILTIN_XSAVEC:
40971 icode = CODE_FOR_xsavec;
40974 gcc_unreachable ();
40976 pat = GEN_FCN (icode) (op0, op1);
40983 case IX86_BUILTIN_LLWPCB:
40984 arg0 = CALL_EXPR_ARG (exp, 0);
40985 op0 = expand_normal (arg0);
40986 icode = CODE_FOR_lwp_llwpcb;
40987 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40988 op0 = ix86_zero_extend_to_Pmode (op0);
40989 emit_insn (gen_lwp_llwpcb (op0));
40992 case IX86_BUILTIN_SLWPCB:
40993 icode = CODE_FOR_lwp_slwpcb;
40995 || !insn_data[icode].operand[0].predicate (target, Pmode))
40996 target = gen_reg_rtx (Pmode);
40997 emit_insn (gen_lwp_slwpcb (target));
41000 case IX86_BUILTIN_BEXTRI32:
41001 case IX86_BUILTIN_BEXTRI64:
41002 arg0 = CALL_EXPR_ARG (exp, 0);
41003 arg1 = CALL_EXPR_ARG (exp, 1);
41004 op0 = expand_normal (arg0);
41005 op1 = expand_normal (arg1);
41006 icode = (fcode == IX86_BUILTIN_BEXTRI32
41007 ? CODE_FOR_tbm_bextri_si
41008 : CODE_FOR_tbm_bextri_di);
41009 if (!CONST_INT_P (op1))
41011 error ("last argument must be an immediate");
41016 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
41017 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41018 op1 = GEN_INT (length);
41019 op2 = GEN_INT (lsb_index);
41020 pat = GEN_FCN (icode) (target, op0, op1, op2);
41026 case IX86_BUILTIN_RDRAND16_STEP:
41027 icode = CODE_FOR_rdrandhi_1;
41031 case IX86_BUILTIN_RDRAND32_STEP:
41032 icode = CODE_FOR_rdrandsi_1;
41036 case IX86_BUILTIN_RDRAND64_STEP:
41037 icode = CODE_FOR_rdranddi_1;
41041 op0 = gen_reg_rtx (mode0);
41042 emit_insn (GEN_FCN (icode) (op0));
41044 arg0 = CALL_EXPR_ARG (exp, 0);
41045 op1 = expand_normal (arg0);
41046 if (!address_operand (op1, VOIDmode))
41048 op1 = convert_memory_address (Pmode, op1);
41049 op1 = copy_addr_to_reg (op1);
41051 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41053 op1 = gen_reg_rtx (SImode);
41054 emit_move_insn (op1, CONST1_RTX (SImode));
41056 /* Emit SImode conditional move. */
41057 if (mode0 == HImode)
41059 op2 = gen_reg_rtx (SImode);
41060 emit_insn (gen_zero_extendhisi2 (op2, op0));
41062 else if (mode0 == SImode)
41065 op2 = gen_rtx_SUBREG (SImode, op0, 0);
41068 || !register_operand (target, SImode))
41069 target = gen_reg_rtx (SImode);
41071 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
41073 emit_insn (gen_rtx_SET (target,
41074 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
41077 case IX86_BUILTIN_RDSEED16_STEP:
41078 icode = CODE_FOR_rdseedhi_1;
41082 case IX86_BUILTIN_RDSEED32_STEP:
41083 icode = CODE_FOR_rdseedsi_1;
41087 case IX86_BUILTIN_RDSEED64_STEP:
41088 icode = CODE_FOR_rdseeddi_1;
41092 op0 = gen_reg_rtx (mode0);
41093 emit_insn (GEN_FCN (icode) (op0));
41095 arg0 = CALL_EXPR_ARG (exp, 0);
41096 op1 = expand_normal (arg0);
41097 if (!address_operand (op1, VOIDmode))
41099 op1 = convert_memory_address (Pmode, op1);
41100 op1 = copy_addr_to_reg (op1);
41102 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41104 op2 = gen_reg_rtx (QImode);
41106 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
41108 emit_insn (gen_rtx_SET (op2, pat));
41111 || !register_operand (target, SImode))
41112 target = gen_reg_rtx (SImode);
41114 emit_insn (gen_zero_extendqisi2 (target, op2));
41117 case IX86_BUILTIN_SBB32:
41118 icode = CODE_FOR_subborrowsi;
41122 case IX86_BUILTIN_SBB64:
41123 icode = CODE_FOR_subborrowdi;
41127 case IX86_BUILTIN_ADDCARRYX32:
41128 icode = CODE_FOR_addcarrysi;
41132 case IX86_BUILTIN_ADDCARRYX64:
41133 icode = CODE_FOR_addcarrydi;
41137 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
41138 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
41139 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
41140 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
41142 op1 = expand_normal (arg0);
41143 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
41145 op2 = expand_normal (arg1);
41146 if (!register_operand (op2, mode0))
41147 op2 = copy_to_mode_reg (mode0, op2);
41149 op3 = expand_normal (arg2);
41150 if (!register_operand (op3, mode0))
41151 op3 = copy_to_mode_reg (mode0, op3);
41153 op4 = expand_normal (arg3);
41154 if (!address_operand (op4, VOIDmode))
41156 op4 = convert_memory_address (Pmode, op4);
41157 op4 = copy_addr_to_reg (op4);
41160 /* Generate CF from input operand. */
41161 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
41163 /* Generate instruction that consumes CF. */
41164 op0 = gen_reg_rtx (mode0);
41166 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
41167 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
41168 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
41170 /* Return current CF value. */
41172 target = gen_reg_rtx (QImode);
41174 PUT_MODE (pat, QImode);
41175 emit_insn (gen_rtx_SET (target, pat));
41177 /* Store the result. */
41178 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
41182 case IX86_BUILTIN_READ_FLAGS:
41183 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
41186 || target == NULL_RTX
41187 || !nonimmediate_operand (target, word_mode)
41188 || GET_MODE (target) != word_mode)
41189 target = gen_reg_rtx (word_mode);
41191 emit_insn (gen_pop (target));
41194 case IX86_BUILTIN_WRITE_FLAGS:
41196 arg0 = CALL_EXPR_ARG (exp, 0);
41197 op0 = expand_normal (arg0);
41198 if (!general_no_elim_operand (op0, word_mode))
41199 op0 = copy_to_mode_reg (word_mode, op0);
41201 emit_insn (gen_push (op0));
41202 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
41205 case IX86_BUILTIN_KORTESTC16:
41206 icode = CODE_FOR_kortestchi;
41211 case IX86_BUILTIN_KORTESTZ16:
41212 icode = CODE_FOR_kortestzhi;
41217 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
41218 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
41219 op0 = expand_normal (arg0);
41220 op1 = expand_normal (arg1);
41222 op0 = copy_to_reg (op0);
41223 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41224 op1 = copy_to_reg (op1);
41225 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
41227 target = gen_reg_rtx (QImode);
41228 emit_insn (gen_rtx_SET (target, const0_rtx));
41230 /* Emit kortest. */
41231 emit_insn (GEN_FCN (icode) (op0, op1));
41232 /* And use setcc to return result from flags. */
41233 ix86_expand_setcc (target, EQ,
41234 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
41237 case IX86_BUILTIN_GATHERSIV2DF:
41238 icode = CODE_FOR_avx2_gathersiv2df;
41240 case IX86_BUILTIN_GATHERSIV4DF:
41241 icode = CODE_FOR_avx2_gathersiv4df;
41243 case IX86_BUILTIN_GATHERDIV2DF:
41244 icode = CODE_FOR_avx2_gatherdiv2df;
41246 case IX86_BUILTIN_GATHERDIV4DF:
41247 icode = CODE_FOR_avx2_gatherdiv4df;
41249 case IX86_BUILTIN_GATHERSIV4SF:
41250 icode = CODE_FOR_avx2_gathersiv4sf;
41252 case IX86_BUILTIN_GATHERSIV8SF:
41253 icode = CODE_FOR_avx2_gathersiv8sf;
41255 case IX86_BUILTIN_GATHERDIV4SF:
41256 icode = CODE_FOR_avx2_gatherdiv4sf;
41258 case IX86_BUILTIN_GATHERDIV8SF:
41259 icode = CODE_FOR_avx2_gatherdiv8sf;
41261 case IX86_BUILTIN_GATHERSIV2DI:
41262 icode = CODE_FOR_avx2_gathersiv2di;
41264 case IX86_BUILTIN_GATHERSIV4DI:
41265 icode = CODE_FOR_avx2_gathersiv4di;
41267 case IX86_BUILTIN_GATHERDIV2DI:
41268 icode = CODE_FOR_avx2_gatherdiv2di;
41270 case IX86_BUILTIN_GATHERDIV4DI:
41271 icode = CODE_FOR_avx2_gatherdiv4di;
41273 case IX86_BUILTIN_GATHERSIV4SI:
41274 icode = CODE_FOR_avx2_gathersiv4si;
41276 case IX86_BUILTIN_GATHERSIV8SI:
41277 icode = CODE_FOR_avx2_gathersiv8si;
41279 case IX86_BUILTIN_GATHERDIV4SI:
41280 icode = CODE_FOR_avx2_gatherdiv4si;
41282 case IX86_BUILTIN_GATHERDIV8SI:
41283 icode = CODE_FOR_avx2_gatherdiv8si;
41285 case IX86_BUILTIN_GATHERALTSIV4DF:
41286 icode = CODE_FOR_avx2_gathersiv4df;
41288 case IX86_BUILTIN_GATHERALTDIV8SF:
41289 icode = CODE_FOR_avx2_gatherdiv8sf;
41291 case IX86_BUILTIN_GATHERALTSIV4DI:
41292 icode = CODE_FOR_avx2_gathersiv4di;
41294 case IX86_BUILTIN_GATHERALTDIV8SI:
41295 icode = CODE_FOR_avx2_gatherdiv8si;
41297 case IX86_BUILTIN_GATHER3SIV16SF:
41298 icode = CODE_FOR_avx512f_gathersiv16sf;
41300 case IX86_BUILTIN_GATHER3SIV8DF:
41301 icode = CODE_FOR_avx512f_gathersiv8df;
41303 case IX86_BUILTIN_GATHER3DIV16SF:
41304 icode = CODE_FOR_avx512f_gatherdiv16sf;
41306 case IX86_BUILTIN_GATHER3DIV8DF:
41307 icode = CODE_FOR_avx512f_gatherdiv8df;
41309 case IX86_BUILTIN_GATHER3SIV16SI:
41310 icode = CODE_FOR_avx512f_gathersiv16si;
41312 case IX86_BUILTIN_GATHER3SIV8DI:
41313 icode = CODE_FOR_avx512f_gathersiv8di;
41315 case IX86_BUILTIN_GATHER3DIV16SI:
41316 icode = CODE_FOR_avx512f_gatherdiv16si;
41318 case IX86_BUILTIN_GATHER3DIV8DI:
41319 icode = CODE_FOR_avx512f_gatherdiv8di;
41321 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41322 icode = CODE_FOR_avx512f_gathersiv8df;
41324 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41325 icode = CODE_FOR_avx512f_gatherdiv16sf;
41327 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41328 icode = CODE_FOR_avx512f_gathersiv8di;
41330 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41331 icode = CODE_FOR_avx512f_gatherdiv16si;
41333 case IX86_BUILTIN_GATHER3SIV2DF:
41334 icode = CODE_FOR_avx512vl_gathersiv2df;
41336 case IX86_BUILTIN_GATHER3SIV4DF:
41337 icode = CODE_FOR_avx512vl_gathersiv4df;
41339 case IX86_BUILTIN_GATHER3DIV2DF:
41340 icode = CODE_FOR_avx512vl_gatherdiv2df;
41342 case IX86_BUILTIN_GATHER3DIV4DF:
41343 icode = CODE_FOR_avx512vl_gatherdiv4df;
41345 case IX86_BUILTIN_GATHER3SIV4SF:
41346 icode = CODE_FOR_avx512vl_gathersiv4sf;
41348 case IX86_BUILTIN_GATHER3SIV8SF:
41349 icode = CODE_FOR_avx512vl_gathersiv8sf;
41351 case IX86_BUILTIN_GATHER3DIV4SF:
41352 icode = CODE_FOR_avx512vl_gatherdiv4sf;
41354 case IX86_BUILTIN_GATHER3DIV8SF:
41355 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41357 case IX86_BUILTIN_GATHER3SIV2DI:
41358 icode = CODE_FOR_avx512vl_gathersiv2di;
41360 case IX86_BUILTIN_GATHER3SIV4DI:
41361 icode = CODE_FOR_avx512vl_gathersiv4di;
41363 case IX86_BUILTIN_GATHER3DIV2DI:
41364 icode = CODE_FOR_avx512vl_gatherdiv2di;
41366 case IX86_BUILTIN_GATHER3DIV4DI:
41367 icode = CODE_FOR_avx512vl_gatherdiv4di;
41369 case IX86_BUILTIN_GATHER3SIV4SI:
41370 icode = CODE_FOR_avx512vl_gathersiv4si;
41372 case IX86_BUILTIN_GATHER3SIV8SI:
41373 icode = CODE_FOR_avx512vl_gathersiv8si;
41375 case IX86_BUILTIN_GATHER3DIV4SI:
41376 icode = CODE_FOR_avx512vl_gatherdiv4si;
41378 case IX86_BUILTIN_GATHER3DIV8SI:
41379 icode = CODE_FOR_avx512vl_gatherdiv8si;
41381 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41382 icode = CODE_FOR_avx512vl_gathersiv4df;
41384 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41385 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41387 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41388 icode = CODE_FOR_avx512vl_gathersiv4di;
41390 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41391 icode = CODE_FOR_avx512vl_gatherdiv8si;
41393 case IX86_BUILTIN_SCATTERSIV16SF:
41394 icode = CODE_FOR_avx512f_scattersiv16sf;
41396 case IX86_BUILTIN_SCATTERSIV8DF:
41397 icode = CODE_FOR_avx512f_scattersiv8df;
41399 case IX86_BUILTIN_SCATTERDIV16SF:
41400 icode = CODE_FOR_avx512f_scatterdiv16sf;
41402 case IX86_BUILTIN_SCATTERDIV8DF:
41403 icode = CODE_FOR_avx512f_scatterdiv8df;
41405 case IX86_BUILTIN_SCATTERSIV16SI:
41406 icode = CODE_FOR_avx512f_scattersiv16si;
41408 case IX86_BUILTIN_SCATTERSIV8DI:
41409 icode = CODE_FOR_avx512f_scattersiv8di;
41411 case IX86_BUILTIN_SCATTERDIV16SI:
41412 icode = CODE_FOR_avx512f_scatterdiv16si;
41414 case IX86_BUILTIN_SCATTERDIV8DI:
41415 icode = CODE_FOR_avx512f_scatterdiv8di;
41417 case IX86_BUILTIN_SCATTERSIV8SF:
41418 icode = CODE_FOR_avx512vl_scattersiv8sf;
41420 case IX86_BUILTIN_SCATTERSIV4SF:
41421 icode = CODE_FOR_avx512vl_scattersiv4sf;
41423 case IX86_BUILTIN_SCATTERSIV4DF:
41424 icode = CODE_FOR_avx512vl_scattersiv4df;
41426 case IX86_BUILTIN_SCATTERSIV2DF:
41427 icode = CODE_FOR_avx512vl_scattersiv2df;
41429 case IX86_BUILTIN_SCATTERDIV8SF:
41430 icode = CODE_FOR_avx512vl_scatterdiv8sf;
41432 case IX86_BUILTIN_SCATTERDIV4SF:
41433 icode = CODE_FOR_avx512vl_scatterdiv4sf;
41435 case IX86_BUILTIN_SCATTERDIV4DF:
41436 icode = CODE_FOR_avx512vl_scatterdiv4df;
41438 case IX86_BUILTIN_SCATTERDIV2DF:
41439 icode = CODE_FOR_avx512vl_scatterdiv2df;
41441 case IX86_BUILTIN_SCATTERSIV8SI:
41442 icode = CODE_FOR_avx512vl_scattersiv8si;
41444 case IX86_BUILTIN_SCATTERSIV4SI:
41445 icode = CODE_FOR_avx512vl_scattersiv4si;
41447 case IX86_BUILTIN_SCATTERSIV4DI:
41448 icode = CODE_FOR_avx512vl_scattersiv4di;
41450 case IX86_BUILTIN_SCATTERSIV2DI:
41451 icode = CODE_FOR_avx512vl_scattersiv2di;
41453 case IX86_BUILTIN_SCATTERDIV8SI:
41454 icode = CODE_FOR_avx512vl_scatterdiv8si;
41456 case IX86_BUILTIN_SCATTERDIV4SI:
41457 icode = CODE_FOR_avx512vl_scatterdiv4si;
41459 case IX86_BUILTIN_SCATTERDIV4DI:
41460 icode = CODE_FOR_avx512vl_scatterdiv4di;
41462 case IX86_BUILTIN_SCATTERDIV2DI:
41463 icode = CODE_FOR_avx512vl_scatterdiv2di;
41465 case IX86_BUILTIN_GATHERPFDPD:
41466 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41467 goto vec_prefetch_gen;
41468 case IX86_BUILTIN_SCATTERALTSIV8DF:
41469 icode = CODE_FOR_avx512f_scattersiv8df;
41471 case IX86_BUILTIN_SCATTERALTDIV16SF:
41472 icode = CODE_FOR_avx512f_scatterdiv16sf;
41474 case IX86_BUILTIN_SCATTERALTSIV8DI:
41475 icode = CODE_FOR_avx512f_scattersiv8di;
41477 case IX86_BUILTIN_SCATTERALTDIV16SI:
41478 icode = CODE_FOR_avx512f_scatterdiv16si;
41480 case IX86_BUILTIN_GATHERPFDPS:
41481 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41482 goto vec_prefetch_gen;
41483 case IX86_BUILTIN_GATHERPFQPD:
41484 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41485 goto vec_prefetch_gen;
41486 case IX86_BUILTIN_GATHERPFQPS:
41487 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41488 goto vec_prefetch_gen;
41489 case IX86_BUILTIN_SCATTERPFDPD:
41490 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41491 goto vec_prefetch_gen;
41492 case IX86_BUILTIN_SCATTERPFDPS:
41493 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41494 goto vec_prefetch_gen;
41495 case IX86_BUILTIN_SCATTERPFQPD:
41496 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41497 goto vec_prefetch_gen;
41498 case IX86_BUILTIN_SCATTERPFQPS:
41499 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41500 goto vec_prefetch_gen;
41504 rtx (*gen) (rtx, rtx);
41506 arg0 = CALL_EXPR_ARG (exp, 0);
41507 arg1 = CALL_EXPR_ARG (exp, 1);
41508 arg2 = CALL_EXPR_ARG (exp, 2);
41509 arg3 = CALL_EXPR_ARG (exp, 3);
41510 arg4 = CALL_EXPR_ARG (exp, 4);
41511 op0 = expand_normal (arg0);
41512 op1 = expand_normal (arg1);
41513 op2 = expand_normal (arg2);
41514 op3 = expand_normal (arg3);
41515 op4 = expand_normal (arg4);
41516 /* Note the arg order is different from the operand order. */
41517 mode0 = insn_data[icode].operand[1].mode;
41518 mode2 = insn_data[icode].operand[3].mode;
41519 mode3 = insn_data[icode].operand[4].mode;
41520 mode4 = insn_data[icode].operand[5].mode;
41522 if (target == NULL_RTX
41523 || GET_MODE (target) != insn_data[icode].operand[0].mode
41524 || !insn_data[icode].operand[0].predicate (target,
41525 GET_MODE (target)))
41526 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41528 subtarget = target;
41532 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41533 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41534 half = gen_reg_rtx (V8SImode);
41535 if (!nonimmediate_operand (op2, V16SImode))
41536 op2 = copy_to_mode_reg (V16SImode, op2);
41537 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41540 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41541 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41542 case IX86_BUILTIN_GATHERALTSIV4DF:
41543 case IX86_BUILTIN_GATHERALTSIV4DI:
41544 half = gen_reg_rtx (V4SImode);
41545 if (!nonimmediate_operand (op2, V8SImode))
41546 op2 = copy_to_mode_reg (V8SImode, op2);
41547 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41550 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41551 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41552 half = gen_reg_rtx (mode0);
41553 if (mode0 == V8SFmode)
41554 gen = gen_vec_extract_lo_v16sf;
41556 gen = gen_vec_extract_lo_v16si;
41557 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41558 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41559 emit_insn (gen (half, op0));
41561 if (GET_MODE (op3) != VOIDmode)
41563 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41564 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41565 emit_insn (gen (half, op3));
41569 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41570 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41571 case IX86_BUILTIN_GATHERALTDIV8SF:
41572 case IX86_BUILTIN_GATHERALTDIV8SI:
41573 half = gen_reg_rtx (mode0);
41574 if (mode0 == V4SFmode)
41575 gen = gen_vec_extract_lo_v8sf;
41577 gen = gen_vec_extract_lo_v8si;
41578 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41579 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41580 emit_insn (gen (half, op0));
41582 if (GET_MODE (op3) != VOIDmode)
41584 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41585 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41586 emit_insn (gen (half, op3));
41594 /* Force memory operand only with base register here. But we
41595 don't want to do it on memory operand for other builtin
41597 op1 = ix86_zero_extend_to_Pmode (op1);
41599 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41600 op0 = copy_to_mode_reg (mode0, op0);
41601 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41602 op1 = copy_to_mode_reg (Pmode, op1);
41603 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41604 op2 = copy_to_mode_reg (mode2, op2);
41606 op3 = fixup_modeless_constant (op3, mode3);
41608 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41610 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41611 op3 = copy_to_mode_reg (mode3, op3);
41615 op3 = copy_to_reg (op3);
41616 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41618 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41620 error ("the last argument must be scale 1, 2, 4, 8");
41624 /* Optimize. If mask is known to have all high bits set,
41625 replace op0 with pc_rtx to signal that the instruction
41626 overwrites the whole destination and doesn't use its
41627 previous contents. */
41630 if (TREE_CODE (arg3) == INTEGER_CST)
41632 if (integer_all_onesp (arg3))
41635 else if (TREE_CODE (arg3) == VECTOR_CST)
41637 unsigned int negative = 0;
41638 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41640 tree cst = VECTOR_CST_ELT (arg3, i);
41641 if (TREE_CODE (cst) == INTEGER_CST
41642 && tree_int_cst_sign_bit (cst))
41644 else if (TREE_CODE (cst) == REAL_CST
41645 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41648 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41651 else if (TREE_CODE (arg3) == SSA_NAME
41652 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41654 /* Recognize also when mask is like:
41655 __v2df src = _mm_setzero_pd ();
41656 __v2df mask = _mm_cmpeq_pd (src, src);
41658 __v8sf src = _mm256_setzero_ps ();
41659 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41660 as that is a cheaper way to load all ones into
41661 a register than having to load a constant from
41663 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41664 if (is_gimple_call (def_stmt))
41666 tree fndecl = gimple_call_fndecl (def_stmt);
41668 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41669 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41671 case IX86_BUILTIN_CMPPD:
41672 case IX86_BUILTIN_CMPPS:
41673 case IX86_BUILTIN_CMPPD256:
41674 case IX86_BUILTIN_CMPPS256:
41675 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41678 case IX86_BUILTIN_CMPEQPD:
41679 case IX86_BUILTIN_CMPEQPS:
41680 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41681 && initializer_zerop (gimple_call_arg (def_stmt,
41692 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41699 case IX86_BUILTIN_GATHER3DIV16SF:
41700 if (target == NULL_RTX)
41701 target = gen_reg_rtx (V8SFmode);
41702 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41704 case IX86_BUILTIN_GATHER3DIV16SI:
41705 if (target == NULL_RTX)
41706 target = gen_reg_rtx (V8SImode);
41707 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41709 case IX86_BUILTIN_GATHER3DIV8SF:
41710 case IX86_BUILTIN_GATHERDIV8SF:
41711 if (target == NULL_RTX)
41712 target = gen_reg_rtx (V4SFmode);
41713 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41715 case IX86_BUILTIN_GATHER3DIV8SI:
41716 case IX86_BUILTIN_GATHERDIV8SI:
41717 if (target == NULL_RTX)
41718 target = gen_reg_rtx (V4SImode);
41719 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41722 target = subtarget;
41728 arg0 = CALL_EXPR_ARG (exp, 0);
41729 arg1 = CALL_EXPR_ARG (exp, 1);
41730 arg2 = CALL_EXPR_ARG (exp, 2);
41731 arg3 = CALL_EXPR_ARG (exp, 3);
41732 arg4 = CALL_EXPR_ARG (exp, 4);
41733 op0 = expand_normal (arg0);
41734 op1 = expand_normal (arg1);
41735 op2 = expand_normal (arg2);
41736 op3 = expand_normal (arg3);
41737 op4 = expand_normal (arg4);
41738 mode1 = insn_data[icode].operand[1].mode;
41739 mode2 = insn_data[icode].operand[2].mode;
41740 mode3 = insn_data[icode].operand[3].mode;
41741 mode4 = insn_data[icode].operand[4].mode;
41743 /* Scatter instruction stores operand op3 to memory with
41744 indices from op2 and scale from op4 under writemask op1.
41745 If index operand op2 has more elements then source operand
41746 op3 one need to use only its low half. And vice versa. */
41749 case IX86_BUILTIN_SCATTERALTSIV8DF:
41750 case IX86_BUILTIN_SCATTERALTSIV8DI:
41751 half = gen_reg_rtx (V8SImode);
41752 if (!nonimmediate_operand (op2, V16SImode))
41753 op2 = copy_to_mode_reg (V16SImode, op2);
41754 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41757 case IX86_BUILTIN_SCATTERALTDIV16SF:
41758 case IX86_BUILTIN_SCATTERALTDIV16SI:
41759 half = gen_reg_rtx (mode3);
41760 if (mode3 == V8SFmode)
41761 gen = gen_vec_extract_lo_v16sf;
41763 gen = gen_vec_extract_lo_v16si;
41764 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41765 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41766 emit_insn (gen (half, op3));
41773 /* Force memory operand only with base register here. But we
41774 don't want to do it on memory operand for other builtin
41776 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41778 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41779 op0 = copy_to_mode_reg (Pmode, op0);
41781 op1 = fixup_modeless_constant (op1, mode1);
41783 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41785 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41786 op1 = copy_to_mode_reg (mode1, op1);
41790 op1 = copy_to_reg (op1);
41791 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41794 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41795 op2 = copy_to_mode_reg (mode2, op2);
41797 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41798 op3 = copy_to_mode_reg (mode3, op3);
41800 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41802 error ("the last argument must be scale 1, 2, 4, 8");
41806 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41814 arg0 = CALL_EXPR_ARG (exp, 0);
41815 arg1 = CALL_EXPR_ARG (exp, 1);
41816 arg2 = CALL_EXPR_ARG (exp, 2);
41817 arg3 = CALL_EXPR_ARG (exp, 3);
41818 arg4 = CALL_EXPR_ARG (exp, 4);
41819 op0 = expand_normal (arg0);
41820 op1 = expand_normal (arg1);
41821 op2 = expand_normal (arg2);
41822 op3 = expand_normal (arg3);
41823 op4 = expand_normal (arg4);
41824 mode0 = insn_data[icode].operand[0].mode;
41825 mode1 = insn_data[icode].operand[1].mode;
41826 mode3 = insn_data[icode].operand[3].mode;
41827 mode4 = insn_data[icode].operand[4].mode;
41829 op0 = fixup_modeless_constant (op0, mode0);
41831 if (GET_MODE (op0) == mode0
41832 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
41834 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41835 op0 = copy_to_mode_reg (mode0, op0);
41837 else if (op0 != constm1_rtx)
41839 op0 = copy_to_reg (op0);
41840 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41843 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41844 op1 = copy_to_mode_reg (mode1, op1);
41846 /* Force memory operand only with base register here. But we
41847 don't want to do it on memory operand for other builtin
41849 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41851 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41852 op2 = copy_to_mode_reg (Pmode, op2);
41854 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41856 error ("the forth argument must be scale 1, 2, 4, 8");
41860 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41862 error ("incorrect hint operand");
41866 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41874 case IX86_BUILTIN_XABORT:
41875 icode = CODE_FOR_xabort;
41876 arg0 = CALL_EXPR_ARG (exp, 0);
41877 op0 = expand_normal (arg0);
41878 mode0 = insn_data[icode].operand[0].mode;
41879 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41881 error ("the xabort's argument must be an 8-bit immediate");
41884 emit_insn (gen_xabort (op0));
41891 for (i = 0, d = bdesc_special_args;
41892 i < ARRAY_SIZE (bdesc_special_args);
41894 if (d->code == fcode)
41895 return ix86_expand_special_args_builtin (d, exp, target);
41897 for (i = 0, d = bdesc_args;
41898 i < ARRAY_SIZE (bdesc_args);
41900 if (d->code == fcode)
41903 case IX86_BUILTIN_FABSQ:
41904 case IX86_BUILTIN_COPYSIGNQ:
41906 /* Emit a normal call if SSE isn't available. */
41907 return expand_call (exp, target, ignore);
41909 return ix86_expand_args_builtin (d, exp, target);
41912 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41913 if (d->code == fcode)
41914 return ix86_expand_sse_comi (d, exp, target);
41916 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41917 if (d->code == fcode)
41918 return ix86_expand_round_builtin (d, exp, target);
41920 for (i = 0, d = bdesc_pcmpestr;
41921 i < ARRAY_SIZE (bdesc_pcmpestr);
41923 if (d->code == fcode)
41924 return ix86_expand_sse_pcmpestr (d, exp, target);
41926 for (i = 0, d = bdesc_pcmpistr;
41927 i < ARRAY_SIZE (bdesc_pcmpistr);
41929 if (d->code == fcode)
41930 return ix86_expand_sse_pcmpistr (d, exp, target);
41932 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41933 if (d->code == fcode)
41934 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41935 (enum ix86_builtin_func_type)
41936 d->flag, d->comparison);
41938 gcc_unreachable ();
41941 /* This returns the target-specific builtin with code CODE if
41942 current_function_decl has visibility on this builtin, which is checked
41943 using isa flags. Returns NULL_TREE otherwise. */
41945 static tree ix86_get_builtin (enum ix86_builtins code)
41947 struct cl_target_option *opts;
41948 tree target_tree = NULL_TREE;
41950 /* Determine the isa flags of current_function_decl. */
41952 if (current_function_decl)
41953 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
41955 if (target_tree == NULL)
41956 target_tree = target_option_default_node;
41958 opts = TREE_TARGET_OPTION (target_tree);
41960 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
41961 return ix86_builtin_decl (code, true);
41966 /* Return function decl for target specific builtin
41967 for given MPX builtin passed i FCODE. */
41969 ix86_builtin_mpx_function (unsigned fcode)
41973 case BUILT_IN_CHKP_BNDMK:
41974 return ix86_builtins[IX86_BUILTIN_BNDMK];
41976 case BUILT_IN_CHKP_BNDSTX:
41977 return ix86_builtins[IX86_BUILTIN_BNDSTX];
41979 case BUILT_IN_CHKP_BNDLDX:
41980 return ix86_builtins[IX86_BUILTIN_BNDLDX];
41982 case BUILT_IN_CHKP_BNDCL:
41983 return ix86_builtins[IX86_BUILTIN_BNDCL];
41985 case BUILT_IN_CHKP_BNDCU:
41986 return ix86_builtins[IX86_BUILTIN_BNDCU];
41988 case BUILT_IN_CHKP_BNDRET:
41989 return ix86_builtins[IX86_BUILTIN_BNDRET];
41991 case BUILT_IN_CHKP_INTERSECT:
41992 return ix86_builtins[IX86_BUILTIN_BNDINT];
41994 case BUILT_IN_CHKP_NARROW:
41995 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
41997 case BUILT_IN_CHKP_SIZEOF:
41998 return ix86_builtins[IX86_BUILTIN_SIZEOF];
42000 case BUILT_IN_CHKP_EXTRACT_LOWER:
42001 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
42003 case BUILT_IN_CHKP_EXTRACT_UPPER:
42004 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
42010 gcc_unreachable ();
42013 /* Helper function for ix86_load_bounds and ix86_store_bounds.
42015 Return an address to be used to load/store bounds for pointer
42018 SLOT_NO is an integer constant holding number of a target
42019 dependent special slot to be used in case SLOT is not a memory.
42021 SPECIAL_BASE is a pointer to be used as a base of fake address
42022 to access special slots in Bounds Table. SPECIAL_BASE[-1],
42023 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
42026 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
42030 /* NULL slot means we pass bounds for pointer not passed to the
42031 function at all. Register slot means we pass pointer in a
42032 register. In both these cases bounds are passed via Bounds
42033 Table. Since we do not have actual pointer stored in memory,
42034 we have to use fake addresses to access Bounds Table. We
42035 start with (special_base - sizeof (void*)) and decrease this
42036 address by pointer size to get addresses for other slots. */
42037 if (!slot || REG_P (slot))
42039 gcc_assert (CONST_INT_P (slot_no));
42040 addr = plus_constant (Pmode, special_base,
42041 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
42043 /* If pointer is passed in a memory then its address is used to
42044 access Bounds Table. */
42045 else if (MEM_P (slot))
42047 addr = XEXP (slot, 0);
42048 if (!register_operand (addr, Pmode))
42049 addr = copy_addr_to_reg (addr);
42052 gcc_unreachable ();
42057 /* Expand pass uses this hook to load bounds for function parameter
42058 PTR passed in SLOT in case its bounds are not passed in a register.
42060 If SLOT is a memory, then bounds are loaded as for regular pointer
42061 loaded from memory. PTR may be NULL in case SLOT is a memory.
42062 In such case value of PTR (if required) may be loaded from SLOT.
42064 If SLOT is NULL or a register then SLOT_NO is an integer constant
42065 holding number of the target dependent special slot which should be
42066 used to obtain bounds.
42068 Return loaded bounds. */
42071 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
42073 rtx reg = gen_reg_rtx (BNDmode);
42076 /* Get address to be used to access Bounds Table. Special slots start
42077 at the location of return address of the current function. */
42078 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
42080 /* Load pointer value from a memory if we don't have it. */
42083 gcc_assert (MEM_P (slot));
42084 ptr = copy_addr_to_reg (slot);
42087 if (!register_operand (ptr, Pmode))
42088 ptr = ix86_zero_extend_to_Pmode (ptr);
42090 emit_insn (BNDmode == BND64mode
42091 ? gen_bnd64_ldx (reg, addr, ptr)
42092 : gen_bnd32_ldx (reg, addr, ptr));
42097 /* Expand pass uses this hook to store BOUNDS for call argument PTR
42098 passed in SLOT in case BOUNDS are not passed in a register.
42100 If SLOT is a memory, then BOUNDS are stored as for regular pointer
42101 stored in memory. PTR may be NULL in case SLOT is a memory.
42102 In such case value of PTR (if required) may be loaded from SLOT.
42104 If SLOT is NULL or a register then SLOT_NO is an integer constant
42105 holding number of the target dependent special slot which should be
42106 used to store BOUNDS. */
42109 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
42113 /* Get address to be used to access Bounds Table. Special slots start
42114 at the location of return address of a called function. */
42115 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
42117 /* Load pointer value from a memory if we don't have it. */
42120 gcc_assert (MEM_P (slot));
42121 ptr = copy_addr_to_reg (slot);
42124 if (!register_operand (ptr, Pmode))
42125 ptr = ix86_zero_extend_to_Pmode (ptr);
42127 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
42128 if (!register_operand (bounds, BNDmode))
42129 bounds = copy_to_mode_reg (BNDmode, bounds);
42131 emit_insn (BNDmode == BND64mode
42132 ? gen_bnd64_stx (addr, ptr, bounds)
42133 : gen_bnd32_stx (addr, ptr, bounds));
42136 /* Load and return bounds returned by function in SLOT. */
42139 ix86_load_returned_bounds (rtx slot)
42143 gcc_assert (REG_P (slot));
42144 res = gen_reg_rtx (BNDmode);
42145 emit_move_insn (res, slot);
42150 /* Store BOUNDS returned by function into SLOT. */
42153 ix86_store_returned_bounds (rtx slot, rtx bounds)
42155 gcc_assert (REG_P (slot));
42156 emit_move_insn (slot, bounds);
42159 /* Returns a function decl for a vectorized version of the combined function
42160 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
42161 if it is not available. */
42164 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
42167 machine_mode in_mode, out_mode;
42170 if (TREE_CODE (type_out) != VECTOR_TYPE
42171 || TREE_CODE (type_in) != VECTOR_TYPE)
42174 out_mode = TYPE_MODE (TREE_TYPE (type_out));
42175 out_n = TYPE_VECTOR_SUBPARTS (type_out);
42176 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42177 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42182 if (out_mode == SFmode && in_mode == SFmode)
42184 if (out_n == 16 && in_n == 16)
42185 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
42192 /* The round insn does not trap on denormals. */
42193 if (flag_trapping_math || !TARGET_ROUND)
42196 if (out_mode == SImode && in_mode == DFmode)
42198 if (out_n == 4 && in_n == 2)
42199 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
42200 else if (out_n == 8 && in_n == 4)
42201 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
42202 else if (out_n == 16 && in_n == 8)
42203 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
42205 if (out_mode == SImode && in_mode == SFmode)
42207 if (out_n == 4 && in_n == 4)
42208 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
42209 else if (out_n == 8 && in_n == 8)
42210 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
42217 /* The round insn does not trap on denormals. */
42218 if (flag_trapping_math || !TARGET_ROUND)
42221 if (out_mode == SImode && in_mode == DFmode)
42223 if (out_n == 4 && in_n == 2)
42224 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
42225 else if (out_n == 8 && in_n == 4)
42226 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
42227 else if (out_n == 16 && in_n == 8)
42228 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
42230 if (out_mode == SImode && in_mode == SFmode)
42232 if (out_n == 4 && in_n == 4)
42233 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
42234 else if (out_n == 8 && in_n == 8)
42235 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
42242 if (out_mode == SImode && in_mode == DFmode)
42244 if (out_n == 4 && in_n == 2)
42245 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
42246 else if (out_n == 8 && in_n == 4)
42247 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
42249 if (out_mode == SImode && in_mode == SFmode)
42251 if (out_n == 4 && in_n == 4)
42252 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
42253 else if (out_n == 8 && in_n == 8)
42254 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
42261 /* The round insn does not trap on denormals. */
42262 if (flag_trapping_math || !TARGET_ROUND)
42265 if (out_mode == SImode && in_mode == DFmode)
42267 if (out_n == 4 && in_n == 2)
42268 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
42269 else if (out_n == 8 && in_n == 4)
42270 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
42271 else if (out_n == 16 && in_n == 8)
42272 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
42274 if (out_mode == SImode && in_mode == SFmode)
42276 if (out_n == 4 && in_n == 4)
42277 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
42278 else if (out_n == 8 && in_n == 8)
42279 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
42284 /* The round insn does not trap on denormals. */
42285 if (flag_trapping_math || !TARGET_ROUND)
42288 if (out_mode == DFmode && in_mode == DFmode)
42290 if (out_n == 2 && in_n == 2)
42291 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
42292 else if (out_n == 4 && in_n == 4)
42293 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
42295 if (out_mode == SFmode && in_mode == SFmode)
42297 if (out_n == 4 && in_n == 4)
42298 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
42299 else if (out_n == 8 && in_n == 8)
42300 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
42305 /* The round insn does not trap on denormals. */
42306 if (flag_trapping_math || !TARGET_ROUND)
42309 if (out_mode == DFmode && in_mode == DFmode)
42311 if (out_n == 2 && in_n == 2)
42312 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
42313 else if (out_n == 4 && in_n == 4)
42314 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
42316 if (out_mode == SFmode && in_mode == SFmode)
42318 if (out_n == 4 && in_n == 4)
42319 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
42320 else if (out_n == 8 && in_n == 8)
42321 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
42326 /* The round insn does not trap on denormals. */
42327 if (flag_trapping_math || !TARGET_ROUND)
42330 if (out_mode == DFmode && in_mode == DFmode)
42332 if (out_n == 2 && in_n == 2)
42333 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
42334 else if (out_n == 4 && in_n == 4)
42335 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
42337 if (out_mode == SFmode && in_mode == SFmode)
42339 if (out_n == 4 && in_n == 4)
42340 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
42341 else if (out_n == 8 && in_n == 8)
42342 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
42347 /* The round insn does not trap on denormals. */
42348 if (flag_trapping_math || !TARGET_ROUND)
42351 if (out_mode == DFmode && in_mode == DFmode)
42353 if (out_n == 2 && in_n == 2)
42354 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
42355 else if (out_n == 4 && in_n == 4)
42356 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
42358 if (out_mode == SFmode && in_mode == SFmode)
42360 if (out_n == 4 && in_n == 4)
42361 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42362 else if (out_n == 8 && in_n == 8)
42363 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42368 if (out_mode == DFmode && in_mode == DFmode)
42370 if (out_n == 2 && in_n == 2)
42371 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42372 if (out_n == 4 && in_n == 4)
42373 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42375 if (out_mode == SFmode && in_mode == SFmode)
42377 if (out_n == 4 && in_n == 4)
42378 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42379 if (out_n == 8 && in_n == 8)
42380 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42388 /* Dispatch to a handler for a vectorization library. */
42389 if (ix86_veclib_handler)
42390 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
42395 /* Handler for an SVML-style interface to
42396 a library with vectorized intrinsics. */
42399 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
42402 tree fntype, new_fndecl, args;
42405 machine_mode el_mode, in_mode;
42408 /* The SVML is suitable for unsafe math only. */
42409 if (!flag_unsafe_math_optimizations)
42412 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42413 n = TYPE_VECTOR_SUBPARTS (type_out);
42414 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42415 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42416 if (el_mode != in_mode
42440 if ((el_mode != DFmode || n != 2)
42441 && (el_mode != SFmode || n != 4))
42449 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42450 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42452 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
42453 strcpy (name, "vmlsLn4");
42454 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
42455 strcpy (name, "vmldLn2");
42458 sprintf (name, "vmls%s", bname+10);
42459 name[strlen (name)-1] = '4';
42462 sprintf (name, "vmld%s2", bname+10);
42464 /* Convert to uppercase. */
42468 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42472 fntype = build_function_type_list (type_out, type_in, NULL);
42474 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42476 /* Build a function declaration for the vectorized function. */
42477 new_fndecl = build_decl (BUILTINS_LOCATION,
42478 FUNCTION_DECL, get_identifier (name), fntype);
42479 TREE_PUBLIC (new_fndecl) = 1;
42480 DECL_EXTERNAL (new_fndecl) = 1;
42481 DECL_IS_NOVOPS (new_fndecl) = 1;
42482 TREE_READONLY (new_fndecl) = 1;
42487 /* Handler for an ACML-style interface to
42488 a library with vectorized intrinsics. */
42491 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
42493 char name[20] = "__vr.._";
42494 tree fntype, new_fndecl, args;
42497 machine_mode el_mode, in_mode;
42500 /* The ACML is 64bits only and suitable for unsafe math only as
42501 it does not correctly support parts of IEEE with the required
42502 precision such as denormals. */
42504 || !flag_unsafe_math_optimizations)
42507 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42508 n = TYPE_VECTOR_SUBPARTS (type_out);
42509 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42510 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42511 if (el_mode != in_mode
42523 if (el_mode == DFmode && n == 2)
42528 else if (el_mode == SFmode && n == 4)
42541 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42542 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42543 sprintf (name + 7, "%s", bname+10);
42546 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42550 fntype = build_function_type_list (type_out, type_in, NULL);
42552 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42554 /* Build a function declaration for the vectorized function. */
42555 new_fndecl = build_decl (BUILTINS_LOCATION,
42556 FUNCTION_DECL, get_identifier (name), fntype);
42557 TREE_PUBLIC (new_fndecl) = 1;
42558 DECL_EXTERNAL (new_fndecl) = 1;
42559 DECL_IS_NOVOPS (new_fndecl) = 1;
42560 TREE_READONLY (new_fndecl) = 1;
42565 /* Returns a decl of a function that implements gather load with
42566 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42567 Return NULL_TREE if it is not available. */
42570 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42571 const_tree index_type, int scale)
42574 enum ix86_builtins code;
42579 if ((TREE_CODE (index_type) != INTEGER_TYPE
42580 && !POINTER_TYPE_P (index_type))
42581 || (TYPE_MODE (index_type) != SImode
42582 && TYPE_MODE (index_type) != DImode))
42585 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42588 /* v*gather* insn sign extends index to pointer mode. */
42589 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42590 && TYPE_UNSIGNED (index_type))
42595 || (scale & (scale - 1)) != 0)
42598 si = TYPE_MODE (index_type) == SImode;
42599 switch (TYPE_MODE (mem_vectype))
42602 if (TARGET_AVX512VL)
42603 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42605 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42608 if (TARGET_AVX512VL)
42609 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42611 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42614 if (TARGET_AVX512VL)
42615 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42617 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42620 if (TARGET_AVX512VL)
42621 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42623 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42626 if (TARGET_AVX512VL)
42627 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42629 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42632 if (TARGET_AVX512VL)
42633 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42635 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42638 if (TARGET_AVX512VL)
42639 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42641 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42644 if (TARGET_AVX512VL)
42645 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42647 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42650 if (TARGET_AVX512F)
42651 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42656 if (TARGET_AVX512F)
42657 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42662 if (TARGET_AVX512F)
42663 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42668 if (TARGET_AVX512F)
42669 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42677 return ix86_get_builtin (code);
42680 /* Returns a decl of a function that implements scatter store with
42681 register type VECTYPE and index type INDEX_TYPE and SCALE.
42682 Return NULL_TREE if it is not available. */
42685 ix86_vectorize_builtin_scatter (const_tree vectype,
42686 const_tree index_type, int scale)
42689 enum ix86_builtins code;
42691 if (!TARGET_AVX512F)
42694 if ((TREE_CODE (index_type) != INTEGER_TYPE
42695 && !POINTER_TYPE_P (index_type))
42696 || (TYPE_MODE (index_type) != SImode
42697 && TYPE_MODE (index_type) != DImode))
42700 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42703 /* v*scatter* insn sign extends index to pointer mode. */
42704 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42705 && TYPE_UNSIGNED (index_type))
42708 /* Scale can be 1, 2, 4 or 8. */
42711 || (scale & (scale - 1)) != 0)
42714 si = TYPE_MODE (index_type) == SImode;
42715 switch (TYPE_MODE (vectype))
42718 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42721 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42724 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42727 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42733 return ix86_builtins[code];
42736 /* Return true if it is safe to use the rsqrt optabs to optimize
42742 return (TARGET_SSE_MATH
42743 && flag_finite_math_only
42744 && !flag_trapping_math
42745 && flag_unsafe_math_optimizations);
42748 /* Returns a code for a target-specific builtin that implements
42749 reciprocal of the function, or NULL_TREE if not available. */
42752 ix86_builtin_reciprocal (tree fndecl)
42754 switch (DECL_FUNCTION_CODE (fndecl))
42756 /* Vectorized version of sqrt to rsqrt conversion. */
42757 case IX86_BUILTIN_SQRTPS_NR:
42758 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42760 case IX86_BUILTIN_SQRTPS_NR256:
42761 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42768 /* Helper for avx_vpermilps256_operand et al. This is also used by
42769 the expansion functions to turn the parallel back into a mask.
42770 The return value is 0 for no match and the imm8+1 for a match. */
42773 avx_vpermilp_parallel (rtx par, machine_mode mode)
42775 unsigned i, nelt = GET_MODE_NUNITS (mode);
42777 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42779 if (XVECLEN (par, 0) != (int) nelt)
42782 /* Validate that all of the elements are constants, and not totally
42783 out of range. Copy the data into an integral array to make the
42784 subsequent checks easier. */
42785 for (i = 0; i < nelt; ++i)
42787 rtx er = XVECEXP (par, 0, i);
42788 unsigned HOST_WIDE_INT ei;
42790 if (!CONST_INT_P (er))
42801 /* In the 512-bit DFmode case, we can only move elements within
42802 a 128-bit lane. First fill the second part of the mask,
42804 for (i = 4; i < 6; ++i)
42806 if (ipar[i] < 4 || ipar[i] >= 6)
42808 mask |= (ipar[i] - 4) << i;
42810 for (i = 6; i < 8; ++i)
42814 mask |= (ipar[i] - 6) << i;
42819 /* In the 256-bit DFmode case, we can only move elements within
42821 for (i = 0; i < 2; ++i)
42825 mask |= ipar[i] << i;
42827 for (i = 2; i < 4; ++i)
42831 mask |= (ipar[i] - 2) << i;
42836 /* In 512 bit SFmode case, permutation in the upper 256 bits
42837 must mirror the permutation in the lower 256-bits. */
42838 for (i = 0; i < 8; ++i)
42839 if (ipar[i] + 8 != ipar[i + 8])
42844 /* In 256 bit SFmode case, we have full freedom of
42845 movement within the low 128-bit lane, but the high 128-bit
42846 lane must mirror the exact same pattern. */
42847 for (i = 0; i < 4; ++i)
42848 if (ipar[i] + 4 != ipar[i + 4])
42855 /* In the 128-bit case, we've full freedom in the placement of
42856 the elements from the source operand. */
42857 for (i = 0; i < nelt; ++i)
42858 mask |= ipar[i] << (i * (nelt / 2));
42862 gcc_unreachable ();
42865 /* Make sure success has a non-zero value by adding one. */
42869 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42870 the expansion functions to turn the parallel back into a mask.
42871 The return value is 0 for no match and the imm8+1 for a match. */
42874 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42876 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42878 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42880 if (XVECLEN (par, 0) != (int) nelt)
42883 /* Validate that all of the elements are constants, and not totally
42884 out of range. Copy the data into an integral array to make the
42885 subsequent checks easier. */
42886 for (i = 0; i < nelt; ++i)
42888 rtx er = XVECEXP (par, 0, i);
42889 unsigned HOST_WIDE_INT ei;
42891 if (!CONST_INT_P (er))
42894 if (ei >= 2 * nelt)
42899 /* Validate that the halves of the permute are halves. */
42900 for (i = 0; i < nelt2 - 1; ++i)
42901 if (ipar[i] + 1 != ipar[i + 1])
42903 for (i = nelt2; i < nelt - 1; ++i)
42904 if (ipar[i] + 1 != ipar[i + 1])
42907 /* Reconstruct the mask. */
42908 for (i = 0; i < 2; ++i)
42910 unsigned e = ipar[i * nelt2];
42914 mask |= e << (i * 4);
42917 /* Make sure success has a non-zero value by adding one. */
42921 /* Return a register priority for hard reg REGNO. */
42923 ix86_register_priority (int hard_regno)
42925 /* ebp and r13 as the base always wants a displacement, r12 as the
42926 base always wants an index. So discourage their usage in an
42928 if (hard_regno == R12_REG || hard_regno == R13_REG)
42930 if (hard_regno == BP_REG)
42932 /* New x86-64 int registers result in bigger code size. Discourage
42934 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42936 /* New x86-64 SSE registers result in bigger code size. Discourage
42938 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42940 /* Usage of AX register results in smaller code. Prefer it. */
42941 if (hard_regno == AX_REG)
42946 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
42948 Put float CONST_DOUBLE in the constant pool instead of fp regs.
42949 QImode must go into class Q_REGS.
42950 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
42951 movdf to do mem-to-mem moves through integer regs. */
42954 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
42956 machine_mode mode = GET_MODE (x);
42958 /* We're only allowed to return a subclass of CLASS. Many of the
42959 following checks fail for NO_REGS, so eliminate that early. */
42960 if (regclass == NO_REGS)
42963 /* All classes can load zeros. */
42964 if (x == CONST0_RTX (mode))
42967 /* Force constants into memory if we are loading a (nonzero) constant into
42968 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
42969 instructions to load from a constant. */
42971 && (MAYBE_MMX_CLASS_P (regclass)
42972 || MAYBE_SSE_CLASS_P (regclass)
42973 || MAYBE_MASK_CLASS_P (regclass)))
42976 /* Prefer SSE regs only, if we can use them for math. */
42977 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
42978 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
42980 /* Floating-point constants need more complex checks. */
42981 if (CONST_DOUBLE_P (x))
42983 /* General regs can load everything. */
42984 if (reg_class_subset_p (regclass, GENERAL_REGS))
42987 /* Floats can load 0 and 1 plus some others. Note that we eliminated
42988 zero above. We only want to wind up preferring 80387 registers if
42989 we plan on doing computation with them. */
42991 && standard_80387_constant_p (x) > 0)
42993 /* Limit class to non-sse. */
42994 if (regclass == FLOAT_SSE_REGS)
42996 if (regclass == FP_TOP_SSE_REGS)
42998 if (regclass == FP_SECOND_SSE_REGS)
42999 return FP_SECOND_REG;
43000 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
43007 /* Generally when we see PLUS here, it's the function invariant
43008 (plus soft-fp const_int). Which can only be computed into general
43010 if (GET_CODE (x) == PLUS)
43011 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
43013 /* QImode constants are easy to load, but non-constant QImode data
43014 must go into Q_REGS. */
43015 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
43017 if (reg_class_subset_p (regclass, Q_REGS))
43019 if (reg_class_subset_p (Q_REGS, regclass))
43027 /* Discourage putting floating-point values in SSE registers unless
43028 SSE math is being used, and likewise for the 387 registers. */
43030 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
43032 machine_mode mode = GET_MODE (x);
43034 /* Restrict the output reload class to the register bank that we are doing
43035 math on. If we would like not to return a subset of CLASS, reject this
43036 alternative: if reload cannot do this, it will still use its choice. */
43037 mode = GET_MODE (x);
43038 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
43039 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
43041 if (X87_FLOAT_MODE_P (mode))
43043 if (regclass == FP_TOP_SSE_REGS)
43045 else if (regclass == FP_SECOND_SSE_REGS)
43046 return FP_SECOND_REG;
43048 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
43055 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
43056 machine_mode mode, secondary_reload_info *sri)
43058 /* Double-word spills from general registers to non-offsettable memory
43059 references (zero-extended addresses) require special handling. */
43062 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
43063 && INTEGER_CLASS_P (rclass)
43064 && !offsettable_memref_p (x))
43067 ? CODE_FOR_reload_noff_load
43068 : CODE_FOR_reload_noff_store);
43069 /* Add the cost of moving address to a temporary. */
43070 sri->extra_cost = 1;
43075 /* QImode spills from non-QI registers require
43076 intermediate register on 32bit targets. */
43078 && (MAYBE_MASK_CLASS_P (rclass)
43079 || (!TARGET_64BIT && !in_p
43080 && INTEGER_CLASS_P (rclass)
43081 && MAYBE_NON_Q_CLASS_P (rclass))))
43090 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
43091 regno = true_regnum (x);
43093 /* Return Q_REGS if the operand is in memory. */
43098 /* This condition handles corner case where an expression involving
43099 pointers gets vectorized. We're trying to use the address of a
43100 stack slot as a vector initializer.
43102 (set (reg:V2DI 74 [ vect_cst_.2 ])
43103 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
43105 Eventually frame gets turned into sp+offset like this:
43107 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43108 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43109 (const_int 392 [0x188]))))
43111 That later gets turned into:
43113 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43114 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43115 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
43117 We'll have the following reload recorded:
43119 Reload 0: reload_in (DI) =
43120 (plus:DI (reg/f:DI 7 sp)
43121 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
43122 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43123 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
43124 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
43125 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43126 reload_reg_rtx: (reg:V2DI 22 xmm1)
43128 Which isn't going to work since SSE instructions can't handle scalar
43129 additions. Returning GENERAL_REGS forces the addition into integer
43130 register and reload can handle subsequent reloads without problems. */
43132 if (in_p && GET_CODE (x) == PLUS
43133 && SSE_CLASS_P (rclass)
43134 && SCALAR_INT_MODE_P (mode))
43135 return GENERAL_REGS;
43140 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
43143 ix86_class_likely_spilled_p (reg_class_t rclass)
43154 case SSE_FIRST_REG:
43156 case FP_SECOND_REG:
43167 /* If we are copying between general and FP registers, we need a memory
43168 location. The same is true for SSE and MMX registers.
43170 To optimize register_move_cost performance, allow inline variant.
43172 The macro can't work reliably when one of the CLASSES is class containing
43173 registers from multiple units (SSE, MMX, integer). We avoid this by never
43174 combining those units in single alternative in the machine description.
43175 Ensure that this constraint holds to avoid unexpected surprises.
43177 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
43178 enforce these sanity checks. */
43181 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43182 machine_mode mode, int strict)
43184 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
43186 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
43187 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
43188 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
43189 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
43190 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
43191 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
43193 gcc_assert (!strict || lra_in_progress);
43197 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
43200 /* Between mask and general, we have moves no larger than word size. */
43201 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
43202 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
43205 /* ??? This is a lie. We do have moves between mmx/general, and for
43206 mmx/sse2. But by saying we need secondary memory we discourage the
43207 register allocator from using the mmx registers unless needed. */
43208 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
43211 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43213 /* SSE1 doesn't have any direct moves from other classes. */
43217 /* If the target says that inter-unit moves are more expensive
43218 than moving through memory, then don't generate them. */
43219 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
43220 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
43223 /* Between SSE and general, we have moves no larger than word size. */
43224 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43232 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43233 machine_mode mode, int strict)
43235 return inline_secondary_memory_needed (class1, class2, mode, strict);
43238 /* Implement the TARGET_CLASS_MAX_NREGS hook.
43240 On the 80386, this is the size of MODE in words,
43241 except in the FP regs, where a single reg is always enough. */
43243 static unsigned char
43244 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
43246 if (MAYBE_INTEGER_CLASS_P (rclass))
43248 if (mode == XFmode)
43249 return (TARGET_64BIT ? 2 : 3);
43250 else if (mode == XCmode)
43251 return (TARGET_64BIT ? 4 : 6);
43253 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
43257 if (COMPLEX_MODE_P (mode))
43264 /* Return true if the registers in CLASS cannot represent the change from
43265 modes FROM to TO. */
43268 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
43269 enum reg_class regclass)
43274 /* x87 registers can't do subreg at all, as all values are reformatted
43275 to extended precision. */
43276 if (MAYBE_FLOAT_CLASS_P (regclass))
43279 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
43281 int from_size = GET_MODE_SIZE (from);
43282 int to_size = GET_MODE_SIZE (to);
43284 /* Vector registers do not support QI or HImode loads. If we don't
43285 disallow a change to these modes, reload will assume it's ok to
43286 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
43287 the vec_dupv4hi pattern. */
43291 /* Further, we cannot allow word_mode subregs of full vector modes.
43292 Otherwise the middle-end will assume it's ok to store to
43293 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
43294 of the 128-bit register. However, after reload the subreg will
43295 be dropped leaving a plain DImode store. This is indistinguishable
43296 from a "normal" DImode move, and so we're justified to use movsd,
43297 which modifies the entire 128-bit register. */
43298 if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
43305 /* Return the cost of moving data of mode M between a
43306 register and memory. A value of 2 is the default; this cost is
43307 relative to those in `REGISTER_MOVE_COST'.
43309 This function is used extensively by register_move_cost that is used to
43310 build tables at startup. Make it inline in this case.
43311 When IN is 2, return maximum of in and out move cost.
43313 If moving between registers and memory is more expensive than
43314 between two registers, you should define this macro to express the
43317 Model also increased moving costs of QImode registers in non
43321 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43325 if (FLOAT_CLASS_P (regclass))
43343 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43344 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43346 if (SSE_CLASS_P (regclass))
43349 switch (GET_MODE_SIZE (mode))
43364 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43365 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43367 if (MMX_CLASS_P (regclass))
43370 switch (GET_MODE_SIZE (mode))
43382 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43383 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43385 switch (GET_MODE_SIZE (mode))
43388 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43391 return ix86_cost->int_store[0];
43392 if (TARGET_PARTIAL_REG_DEPENDENCY
43393 && optimize_function_for_speed_p (cfun))
43394 cost = ix86_cost->movzbl_load;
43396 cost = ix86_cost->int_load[0];
43398 return MAX (cost, ix86_cost->int_store[0]);
43404 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43406 return ix86_cost->movzbl_load;
43408 return ix86_cost->int_store[0] + 4;
43413 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43414 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43416 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43417 if (mode == TFmode)
43420 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43422 cost = ix86_cost->int_load[2];
43424 cost = ix86_cost->int_store[2];
43425 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43430 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43433 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43437 /* Return the cost of moving data from a register in class CLASS1 to
43438 one in class CLASS2.
43440 It is not required that the cost always equal 2 when FROM is the same as TO;
43441 on some machines it is expensive to move between registers if they are not
43442 general registers. */
43445 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43446 reg_class_t class2_i)
43448 enum reg_class class1 = (enum reg_class) class1_i;
43449 enum reg_class class2 = (enum reg_class) class2_i;
43451 /* In case we require secondary memory, compute cost of the store followed
43452 by load. In order to avoid bad register allocation choices, we need
43453 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43455 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43459 cost += inline_memory_move_cost (mode, class1, 2);
43460 cost += inline_memory_move_cost (mode, class2, 2);
43462 /* In case of copying from general_purpose_register we may emit multiple
43463 stores followed by single load causing memory size mismatch stall.
43464 Count this as arbitrarily high cost of 20. */
43465 if (targetm.class_max_nregs (class1, mode)
43466 > targetm.class_max_nregs (class2, mode))
43469 /* In the case of FP/MMX moves, the registers actually overlap, and we
43470 have to switch modes in order to treat them differently. */
43471 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43472 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43478 /* Moves between SSE/MMX and integer unit are expensive. */
43479 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43480 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43482 /* ??? By keeping returned value relatively high, we limit the number
43483 of moves between integer and MMX/SSE registers for all targets.
43484 Additionally, high value prevents problem with x86_modes_tieable_p(),
43485 where integer modes in MMX/SSE registers are not tieable
43486 because of missing QImode and HImode moves to, from or between
43487 MMX/SSE registers. */
43488 return MAX (8, ix86_cost->mmxsse_to_integer);
43490 if (MAYBE_FLOAT_CLASS_P (class1))
43491 return ix86_cost->fp_move;
43492 if (MAYBE_SSE_CLASS_P (class1))
43493 return ix86_cost->sse_move;
43494 if (MAYBE_MMX_CLASS_P (class1))
43495 return ix86_cost->mmx_move;
43499 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43503 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43505 /* Flags and only flags can only hold CCmode values. */
43506 if (CC_REGNO_P (regno))
43507 return GET_MODE_CLASS (mode) == MODE_CC;
43508 if (GET_MODE_CLASS (mode) == MODE_CC
43509 || GET_MODE_CLASS (mode) == MODE_RANDOM
43510 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43512 if (STACK_REGNO_P (regno))
43513 return VALID_FP_MODE_P (mode);
43514 if (MASK_REGNO_P (regno))
43515 return (VALID_MASK_REG_MODE (mode)
43516 || (TARGET_AVX512BW
43517 && VALID_MASK_AVX512BW_MODE (mode)));
43518 if (BND_REGNO_P (regno))
43519 return VALID_BND_REG_MODE (mode);
43520 if (SSE_REGNO_P (regno))
43522 /* We implement the move patterns for all vector modes into and
43523 out of SSE registers, even when no operation instructions
43526 /* For AVX-512 we allow, regardless of regno:
43528 - any of 512-bit wide vector mode
43529 - any scalar mode. */
43532 || VALID_AVX512F_REG_MODE (mode)
43533 || VALID_AVX512F_SCALAR_MODE (mode)))
43536 /* TODO check for QI/HI scalars. */
43537 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43538 if (TARGET_AVX512VL
43541 || VALID_AVX256_REG_MODE (mode)
43542 || VALID_AVX512VL_128_REG_MODE (mode)))
43545 /* xmm16-xmm31 are only available for AVX-512. */
43546 if (EXT_REX_SSE_REGNO_P (regno))
43549 /* OImode and AVX modes are available only when AVX is enabled. */
43550 return ((TARGET_AVX
43551 && VALID_AVX256_REG_OR_OI_MODE (mode))
43552 || VALID_SSE_REG_MODE (mode)
43553 || VALID_SSE2_REG_MODE (mode)
43554 || VALID_MMX_REG_MODE (mode)
43555 || VALID_MMX_REG_MODE_3DNOW (mode));
43557 if (MMX_REGNO_P (regno))
43559 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43560 so if the register is available at all, then we can move data of
43561 the given mode into or out of it. */
43562 return (VALID_MMX_REG_MODE (mode)
43563 || VALID_MMX_REG_MODE_3DNOW (mode));
43566 if (mode == QImode)
43568 /* Take care for QImode values - they can be in non-QI regs,
43569 but then they do cause partial register stalls. */
43570 if (ANY_QI_REGNO_P (regno))
43572 if (!TARGET_PARTIAL_REG_STALL)
43574 /* LRA checks if the hard register is OK for the given mode.
43575 QImode values can live in non-QI regs, so we allow all
43577 if (lra_in_progress)
43579 return !can_create_pseudo_p ();
43581 /* We handle both integer and floats in the general purpose registers. */
43582 else if (VALID_INT_MODE_P (mode))
43584 else if (VALID_FP_MODE_P (mode))
43586 else if (VALID_DFP_MODE_P (mode))
43588 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43589 on to use that value in smaller contexts, this can easily force a
43590 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43591 supporting DImode, allow it. */
43592 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43598 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43599 tieable integer mode. */
43602 ix86_tieable_integer_mode_p (machine_mode mode)
43611 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43614 return TARGET_64BIT;
43621 /* Return true if MODE1 is accessible in a register that can hold MODE2
43622 without copying. That is, all register classes that can hold MODE2
43623 can also hold MODE1. */
43626 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43628 if (mode1 == mode2)
43631 if (ix86_tieable_integer_mode_p (mode1)
43632 && ix86_tieable_integer_mode_p (mode2))
43635 /* MODE2 being XFmode implies fp stack or general regs, which means we
43636 can tie any smaller floating point modes to it. Note that we do not
43637 tie this with TFmode. */
43638 if (mode2 == XFmode)
43639 return mode1 == SFmode || mode1 == DFmode;
43641 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43642 that we can tie it with SFmode. */
43643 if (mode2 == DFmode)
43644 return mode1 == SFmode;
43646 /* If MODE2 is only appropriate for an SSE register, then tie with
43647 any other mode acceptable to SSE registers. */
43648 if (GET_MODE_SIZE (mode2) == 32
43649 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43650 return (GET_MODE_SIZE (mode1) == 32
43651 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43652 if (GET_MODE_SIZE (mode2) == 16
43653 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43654 return (GET_MODE_SIZE (mode1) == 16
43655 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43657 /* If MODE2 is appropriate for an MMX register, then tie
43658 with any other mode acceptable to MMX registers. */
43659 if (GET_MODE_SIZE (mode2) == 8
43660 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43661 return (GET_MODE_SIZE (mode1) == 8
43662 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43667 /* Return the cost of moving between two registers of mode MODE. */
43670 ix86_set_reg_reg_cost (machine_mode mode)
43672 unsigned int units = UNITS_PER_WORD;
43674 switch (GET_MODE_CLASS (mode))
43680 units = GET_MODE_SIZE (CCmode);
43684 if ((TARGET_SSE && mode == TFmode)
43685 || (TARGET_80387 && mode == XFmode)
43686 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43687 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43688 units = GET_MODE_SIZE (mode);
43691 case MODE_COMPLEX_FLOAT:
43692 if ((TARGET_SSE && mode == TCmode)
43693 || (TARGET_80387 && mode == XCmode)
43694 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43695 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43696 units = GET_MODE_SIZE (mode);
43699 case MODE_VECTOR_INT:
43700 case MODE_VECTOR_FLOAT:
43701 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43702 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43703 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43704 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43705 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43706 units = GET_MODE_SIZE (mode);
43709 /* Return the cost of moving between two registers of mode MODE,
43710 assuming that the move will be in pieces of at most UNITS bytes. */
43711 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43714 /* Compute a (partial) cost for rtx X. Return true if the complete
43715 cost has been computed, and false if subexpressions should be
43716 scanned. In either case, *TOTAL contains the cost result. */
43719 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43720 int *total, bool speed)
43723 enum rtx_code code = GET_CODE (x);
43724 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43725 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43730 if (register_operand (SET_DEST (x), VOIDmode)
43731 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43733 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43742 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43744 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43746 else if (flag_pic && SYMBOLIC_CONST (x)
43748 && (GET_CODE (x) == LABEL_REF
43749 || (GET_CODE (x) == SYMBOL_REF
43750 && SYMBOL_REF_LOCAL_P (x))))
43751 /* Use 0 cost for CONST to improve its propagation. */
43752 && (TARGET_64BIT || GET_CODE (x) != CONST))
43758 case CONST_WIDE_INT:
43763 switch (standard_80387_constant_p (x))
43768 default: /* Other constants */
43775 if (SSE_FLOAT_MODE_P (mode))
43778 switch (standard_sse_constant_p (x))
43782 case 1: /* 0: xor eliminates false dependency */
43785 default: /* -1: cmp contains false dependency */
43790 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43791 it'll probably end up. Add a penalty for size. */
43792 *total = (COSTS_N_INSNS (1)
43793 + (flag_pic != 0 && !TARGET_64BIT)
43794 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43798 /* The zero extensions is often completely free on x86_64, so make
43799 it as cheap as possible. */
43800 if (TARGET_64BIT && mode == DImode
43801 && GET_MODE (XEXP (x, 0)) == SImode)
43803 else if (TARGET_ZERO_EXTEND_WITH_AND)
43804 *total = cost->add;
43806 *total = cost->movzx;
43810 *total = cost->movsx;
43814 if (SCALAR_INT_MODE_P (mode)
43815 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43816 && CONST_INT_P (XEXP (x, 1)))
43818 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43821 *total = cost->add;
43824 if ((value == 2 || value == 3)
43825 && cost->lea <= cost->shift_const)
43827 *total = cost->lea;
43837 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43839 /* ??? Should be SSE vector operation cost. */
43840 /* At least for published AMD latencies, this really is the same
43841 as the latency for a simple fpu operation like fabs. */
43842 /* V*QImode is emulated with 1-11 insns. */
43843 if (mode == V16QImode || mode == V32QImode)
43846 if (TARGET_XOP && mode == V16QImode)
43848 /* For XOP we use vpshab, which requires a broadcast of the
43849 value to the variable shift insn. For constants this
43850 means a V16Q const in mem; even when we can perform the
43851 shift with one insn set the cost to prefer paddb. */
43852 if (CONSTANT_P (XEXP (x, 1)))
43854 *total = (cost->fabs
43855 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43856 + (speed ? 2 : COSTS_N_BYTES (16)));
43861 else if (TARGET_SSSE3)
43863 *total = cost->fabs * count;
43866 *total = cost->fabs;
43868 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43870 if (CONST_INT_P (XEXP (x, 1)))
43872 if (INTVAL (XEXP (x, 1)) > 32)
43873 *total = cost->shift_const + COSTS_N_INSNS (2);
43875 *total = cost->shift_const * 2;
43879 if (GET_CODE (XEXP (x, 1)) == AND)
43880 *total = cost->shift_var * 2;
43882 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43887 if (CONST_INT_P (XEXP (x, 1)))
43888 *total = cost->shift_const;
43889 else if (SUBREG_P (XEXP (x, 1))
43890 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43892 /* Return the cost after shift-and truncation. */
43893 *total = cost->shift_var;
43897 *total = cost->shift_var;
43905 gcc_assert (FLOAT_MODE_P (mode));
43906 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43908 /* ??? SSE scalar/vector cost should be used here. */
43909 /* ??? Bald assumption that fma has the same cost as fmul. */
43910 *total = cost->fmul;
43911 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43913 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43915 if (GET_CODE (sub) == NEG)
43916 sub = XEXP (sub, 0);
43917 *total += rtx_cost (sub, mode, FMA, 0, speed);
43920 if (GET_CODE (sub) == NEG)
43921 sub = XEXP (sub, 0);
43922 *total += rtx_cost (sub, mode, FMA, 2, speed);
43927 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43929 /* ??? SSE scalar cost should be used here. */
43930 *total = cost->fmul;
43933 else if (X87_FLOAT_MODE_P (mode))
43935 *total = cost->fmul;
43938 else if (FLOAT_MODE_P (mode))
43940 /* ??? SSE vector cost should be used here. */
43941 *total = cost->fmul;
43944 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43946 /* V*QImode is emulated with 7-13 insns. */
43947 if (mode == V16QImode || mode == V32QImode)
43950 if (TARGET_XOP && mode == V16QImode)
43952 else if (TARGET_SSSE3)
43954 *total = cost->fmul * 2 + cost->fabs * extra;
43956 /* V*DImode is emulated with 5-8 insns. */
43957 else if (mode == V2DImode || mode == V4DImode)
43959 if (TARGET_XOP && mode == V2DImode)
43960 *total = cost->fmul * 2 + cost->fabs * 3;
43962 *total = cost->fmul * 3 + cost->fabs * 5;
43964 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
43965 insns, including two PMULUDQ. */
43966 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
43967 *total = cost->fmul * 2 + cost->fabs * 5;
43969 *total = cost->fmul;
43974 rtx op0 = XEXP (x, 0);
43975 rtx op1 = XEXP (x, 1);
43977 if (CONST_INT_P (XEXP (x, 1)))
43979 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43980 for (nbits = 0; value != 0; value &= value - 1)
43984 /* This is arbitrary. */
43987 /* Compute costs correctly for widening multiplication. */
43988 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
43989 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
43990 == GET_MODE_SIZE (mode))
43992 int is_mulwiden = 0;
43993 machine_mode inner_mode = GET_MODE (op0);
43995 if (GET_CODE (op0) == GET_CODE (op1))
43996 is_mulwiden = 1, op1 = XEXP (op1, 0);
43997 else if (CONST_INT_P (op1))
43999 if (GET_CODE (op0) == SIGN_EXTEND)
44000 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
44003 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
44007 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
44010 *total = (cost->mult_init[MODE_INDEX (mode)]
44011 + nbits * cost->mult_bit
44012 + rtx_cost (op0, mode, outer_code, opno, speed)
44013 + rtx_cost (op1, mode, outer_code, opno, speed));
44022 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44023 /* ??? SSE cost should be used here. */
44024 *total = cost->fdiv;
44025 else if (X87_FLOAT_MODE_P (mode))
44026 *total = cost->fdiv;
44027 else if (FLOAT_MODE_P (mode))
44028 /* ??? SSE vector cost should be used here. */
44029 *total = cost->fdiv;
44031 *total = cost->divide[MODE_INDEX (mode)];
44035 if (GET_MODE_CLASS (mode) == MODE_INT
44036 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
44038 if (GET_CODE (XEXP (x, 0)) == PLUS
44039 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
44040 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
44041 && CONSTANT_P (XEXP (x, 1)))
44043 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
44044 if (val == 2 || val == 4 || val == 8)
44046 *total = cost->lea;
44047 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44048 outer_code, opno, speed);
44049 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
44050 outer_code, opno, speed);
44051 *total += rtx_cost (XEXP (x, 1), mode,
44052 outer_code, opno, speed);
44056 else if (GET_CODE (XEXP (x, 0)) == MULT
44057 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
44059 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
44060 if (val == 2 || val == 4 || val == 8)
44062 *total = cost->lea;
44063 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44064 outer_code, opno, speed);
44065 *total += rtx_cost (XEXP (x, 1), mode,
44066 outer_code, opno, speed);
44070 else if (GET_CODE (XEXP (x, 0)) == PLUS)
44072 *total = cost->lea;
44073 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44074 outer_code, opno, speed);
44075 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44076 outer_code, opno, speed);
44077 *total += rtx_cost (XEXP (x, 1), mode,
44078 outer_code, opno, speed);
44085 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44087 /* ??? SSE cost should be used here. */
44088 *total = cost->fadd;
44091 else if (X87_FLOAT_MODE_P (mode))
44093 *total = cost->fadd;
44096 else if (FLOAT_MODE_P (mode))
44098 /* ??? SSE vector cost should be used here. */
44099 *total = cost->fadd;
44107 if (GET_MODE_CLASS (mode) == MODE_INT
44108 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44110 *total = (cost->add * 2
44111 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
44112 << (GET_MODE (XEXP (x, 0)) != DImode))
44113 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
44114 << (GET_MODE (XEXP (x, 1)) != DImode)));
44120 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44122 /* ??? SSE cost should be used here. */
44123 *total = cost->fchs;
44126 else if (X87_FLOAT_MODE_P (mode))
44128 *total = cost->fchs;
44131 else if (FLOAT_MODE_P (mode))
44133 /* ??? SSE vector cost should be used here. */
44134 *total = cost->fchs;
44140 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44142 /* ??? Should be SSE vector operation cost. */
44143 /* At least for published AMD latencies, this really is the same
44144 as the latency for a simple fpu operation like fabs. */
44145 *total = cost->fabs;
44147 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44148 *total = cost->add * 2;
44150 *total = cost->add;
44154 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
44155 && XEXP (XEXP (x, 0), 1) == const1_rtx
44156 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
44157 && XEXP (x, 1) == const0_rtx)
44159 /* This kind of construct is implemented using test[bwl].
44160 Treat it as if we had an AND. */
44161 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
44162 *total = (cost->add
44163 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
44165 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
44169 /* The embedded comparison operand is completely free. */
44170 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
44171 && XEXP (x, 1) == const0_rtx)
44177 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
44182 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44183 /* ??? SSE cost should be used here. */
44184 *total = cost->fabs;
44185 else if (X87_FLOAT_MODE_P (mode))
44186 *total = cost->fabs;
44187 else if (FLOAT_MODE_P (mode))
44188 /* ??? SSE vector cost should be used here. */
44189 *total = cost->fabs;
44193 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44194 /* ??? SSE cost should be used here. */
44195 *total = cost->fsqrt;
44196 else if (X87_FLOAT_MODE_P (mode))
44197 *total = cost->fsqrt;
44198 else if (FLOAT_MODE_P (mode))
44199 /* ??? SSE vector cost should be used here. */
44200 *total = cost->fsqrt;
44204 if (XINT (x, 1) == UNSPEC_TP)
44210 case VEC_DUPLICATE:
44211 /* ??? Assume all of these vector manipulation patterns are
44212 recognizable. In which case they all pretty much have the
44214 *total = cost->fabs;
44217 mask = XEXP (x, 2);
44218 /* This is masked instruction, assume the same cost,
44219 as nonmasked variant. */
44220 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
44221 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
44223 *total = cost->fabs;
44233 static int current_machopic_label_num;
44235 /* Given a symbol name and its associated stub, write out the
44236 definition of the stub. */
44239 machopic_output_stub (FILE *file, const char *symb, const char *stub)
44241 unsigned int length;
44242 char *binder_name, *symbol_name, lazy_ptr_name[32];
44243 int label = ++current_machopic_label_num;
44245 /* For 64-bit we shouldn't get here. */
44246 gcc_assert (!TARGET_64BIT);
44248 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
44249 symb = targetm.strip_name_encoding (symb);
44251 length = strlen (stub);
44252 binder_name = XALLOCAVEC (char, length + 32);
44253 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
44255 length = strlen (symb);
44256 symbol_name = XALLOCAVEC (char, length + 32);
44257 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
44259 sprintf (lazy_ptr_name, "L%d$lz", label);
44261 if (MACHOPIC_ATT_STUB)
44262 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
44263 else if (MACHOPIC_PURE)
44264 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
44266 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
44268 fprintf (file, "%s:\n", stub);
44269 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44271 if (MACHOPIC_ATT_STUB)
44273 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
44275 else if (MACHOPIC_PURE)
44278 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44279 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
44280 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
44281 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
44282 label, lazy_ptr_name, label);
44283 fprintf (file, "\tjmp\t*%%ecx\n");
44286 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
44288 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
44289 it needs no stub-binding-helper. */
44290 if (MACHOPIC_ATT_STUB)
44293 fprintf (file, "%s:\n", binder_name);
44297 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
44298 fprintf (file, "\tpushl\t%%ecx\n");
44301 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44303 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44305 /* N.B. Keep the correspondence of these
44306 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44307 old-pic/new-pic/non-pic stubs; altering this will break
44308 compatibility with existing dylibs. */
44311 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44312 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44315 /* 16-byte -mdynamic-no-pic stub. */
44316 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44318 fprintf (file, "%s:\n", lazy_ptr_name);
44319 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44320 fprintf (file, ASM_LONG "%s\n", binder_name);
44322 #endif /* TARGET_MACHO */
44324 /* Order the registers for register allocator. */
44327 x86_order_regs_for_local_alloc (void)
44332 /* First allocate the local general purpose registers. */
44333 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44334 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44335 reg_alloc_order [pos++] = i;
44337 /* Global general purpose registers. */
44338 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44339 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44340 reg_alloc_order [pos++] = i;
44342 /* x87 registers come first in case we are doing FP math
44344 if (!TARGET_SSE_MATH)
44345 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44346 reg_alloc_order [pos++] = i;
44348 /* SSE registers. */
44349 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44350 reg_alloc_order [pos++] = i;
44351 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44352 reg_alloc_order [pos++] = i;
44354 /* Extended REX SSE registers. */
44355 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44356 reg_alloc_order [pos++] = i;
44358 /* Mask register. */
44359 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44360 reg_alloc_order [pos++] = i;
44362 /* MPX bound registers. */
44363 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44364 reg_alloc_order [pos++] = i;
44366 /* x87 registers. */
44367 if (TARGET_SSE_MATH)
44368 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44369 reg_alloc_order [pos++] = i;
44371 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44372 reg_alloc_order [pos++] = i;
44374 /* Initialize the rest of array as we do not allocate some registers
44376 while (pos < FIRST_PSEUDO_REGISTER)
44377 reg_alloc_order [pos++] = 0;
44380 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44381 in struct attribute_spec handler. */
44383 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44386 bool *no_add_attrs)
44388 if (TREE_CODE (*node) != FUNCTION_TYPE
44389 && TREE_CODE (*node) != METHOD_TYPE
44390 && TREE_CODE (*node) != FIELD_DECL
44391 && TREE_CODE (*node) != TYPE_DECL)
44393 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44395 *no_add_attrs = true;
44400 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44402 *no_add_attrs = true;
44405 if (is_attribute_p ("callee_pop_aggregate_return", name))
44409 cst = TREE_VALUE (args);
44410 if (TREE_CODE (cst) != INTEGER_CST)
44412 warning (OPT_Wattributes,
44413 "%qE attribute requires an integer constant argument",
44415 *no_add_attrs = true;
44417 else if (compare_tree_int (cst, 0) != 0
44418 && compare_tree_int (cst, 1) != 0)
44420 warning (OPT_Wattributes,
44421 "argument to %qE attribute is neither zero, nor one",
44423 *no_add_attrs = true;
44432 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44433 struct attribute_spec.handler. */
44435 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44436 bool *no_add_attrs)
44438 if (TREE_CODE (*node) != FUNCTION_TYPE
44439 && TREE_CODE (*node) != METHOD_TYPE
44440 && TREE_CODE (*node) != FIELD_DECL
44441 && TREE_CODE (*node) != TYPE_DECL)
44443 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44445 *no_add_attrs = true;
44449 /* Can combine regparm with all attributes but fastcall. */
44450 if (is_attribute_p ("ms_abi", name))
44452 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44454 error ("ms_abi and sysv_abi attributes are not compatible");
44459 else if (is_attribute_p ("sysv_abi", name))
44461 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44463 error ("ms_abi and sysv_abi attributes are not compatible");
44472 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44473 struct attribute_spec.handler. */
44475 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44476 bool *no_add_attrs)
44479 if (DECL_P (*node))
44481 if (TREE_CODE (*node) == TYPE_DECL)
44482 type = &TREE_TYPE (*node);
44487 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44489 warning (OPT_Wattributes, "%qE attribute ignored",
44491 *no_add_attrs = true;
44494 else if ((is_attribute_p ("ms_struct", name)
44495 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44496 || ((is_attribute_p ("gcc_struct", name)
44497 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44499 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44501 *no_add_attrs = true;
44508 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44509 bool *no_add_attrs)
44511 if (TREE_CODE (*node) != FUNCTION_DECL)
44513 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44515 *no_add_attrs = true;
44521 ix86_ms_bitfield_layout_p (const_tree record_type)
44523 return ((TARGET_MS_BITFIELD_LAYOUT
44524 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44525 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44528 /* Returns an expression indicating where the this parameter is
44529 located on entry to the FUNCTION. */
44532 x86_this_parameter (tree function)
44534 tree type = TREE_TYPE (function);
44535 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44540 const int *parm_regs;
44542 if (ix86_function_type_abi (type) == MS_ABI)
44543 parm_regs = x86_64_ms_abi_int_parameter_registers;
44545 parm_regs = x86_64_int_parameter_registers;
44546 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44549 nregs = ix86_function_regparm (type, function);
44551 if (nregs > 0 && !stdarg_p (type))
44554 unsigned int ccvt = ix86_get_callcvt (type);
44556 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44557 regno = aggr ? DX_REG : CX_REG;
44558 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44562 return gen_rtx_MEM (SImode,
44563 plus_constant (Pmode, stack_pointer_rtx, 4));
44572 return gen_rtx_MEM (SImode,
44573 plus_constant (Pmode,
44574 stack_pointer_rtx, 4));
44577 return gen_rtx_REG (SImode, regno);
44580 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44584 /* Determine whether x86_output_mi_thunk can succeed. */
44587 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44588 const_tree function)
44590 /* 64-bit can handle anything. */
44594 /* For 32-bit, everything's fine if we have one free register. */
44595 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44598 /* Need a free register for vcall_offset. */
44602 /* Need a free register for GOT references. */
44603 if (flag_pic && !targetm.binds_local_p (function))
44606 /* Otherwise ok. */
44610 /* Output the assembler code for a thunk function. THUNK_DECL is the
44611 declaration for the thunk function itself, FUNCTION is the decl for
44612 the target function. DELTA is an immediate constant offset to be
44613 added to THIS. If VCALL_OFFSET is nonzero, the word at
44614 *(*this + vcall_offset) should be added to THIS. */
44617 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44618 HOST_WIDE_INT vcall_offset, tree function)
44620 rtx this_param = x86_this_parameter (function);
44621 rtx this_reg, tmp, fnaddr;
44622 unsigned int tmp_regno;
44626 tmp_regno = R10_REG;
44629 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44630 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44631 tmp_regno = AX_REG;
44632 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44633 tmp_regno = DX_REG;
44635 tmp_regno = CX_REG;
44638 emit_note (NOTE_INSN_PROLOGUE_END);
44640 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44641 pull it in now and let DELTA benefit. */
44642 if (REG_P (this_param))
44643 this_reg = this_param;
44644 else if (vcall_offset)
44646 /* Put the this parameter into %eax. */
44647 this_reg = gen_rtx_REG (Pmode, AX_REG);
44648 emit_move_insn (this_reg, this_param);
44651 this_reg = NULL_RTX;
44653 /* Adjust the this parameter by a fixed constant. */
44656 rtx delta_rtx = GEN_INT (delta);
44657 rtx delta_dst = this_reg ? this_reg : this_param;
44661 if (!x86_64_general_operand (delta_rtx, Pmode))
44663 tmp = gen_rtx_REG (Pmode, tmp_regno);
44664 emit_move_insn (tmp, delta_rtx);
44669 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44672 /* Adjust the this parameter by a value stored in the vtable. */
44675 rtx vcall_addr, vcall_mem, this_mem;
44677 tmp = gen_rtx_REG (Pmode, tmp_regno);
44679 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44680 if (Pmode != ptr_mode)
44681 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44682 emit_move_insn (tmp, this_mem);
44684 /* Adjust the this parameter. */
44685 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44687 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44689 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44690 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44691 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44694 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44695 if (Pmode != ptr_mode)
44696 emit_insn (gen_addsi_1_zext (this_reg,
44697 gen_rtx_REG (ptr_mode,
44701 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44704 /* If necessary, drop THIS back to its stack slot. */
44705 if (this_reg && this_reg != this_param)
44706 emit_move_insn (this_param, this_reg);
44708 fnaddr = XEXP (DECL_RTL (function), 0);
44711 if (!flag_pic || targetm.binds_local_p (function)
44716 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44717 tmp = gen_rtx_CONST (Pmode, tmp);
44718 fnaddr = gen_const_mem (Pmode, tmp);
44723 if (!flag_pic || targetm.binds_local_p (function))
44726 else if (TARGET_MACHO)
44728 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44729 fnaddr = XEXP (fnaddr, 0);
44731 #endif /* TARGET_MACHO */
44734 tmp = gen_rtx_REG (Pmode, CX_REG);
44735 output_set_got (tmp, NULL_RTX);
44737 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44738 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44739 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44740 fnaddr = gen_const_mem (Pmode, fnaddr);
44744 /* Our sibling call patterns do not allow memories, because we have no
44745 predicate that can distinguish between frame and non-frame memory.
44746 For our purposes here, we can get away with (ab)using a jump pattern,
44747 because we're going to do no optimization. */
44748 if (MEM_P (fnaddr))
44750 if (sibcall_insn_operand (fnaddr, word_mode))
44752 fnaddr = XEXP (DECL_RTL (function), 0);
44753 tmp = gen_rtx_MEM (QImode, fnaddr);
44754 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44755 tmp = emit_call_insn (tmp);
44756 SIBLING_CALL_P (tmp) = 1;
44759 emit_jump_insn (gen_indirect_jump (fnaddr));
44763 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44765 // CM_LARGE_PIC always uses pseudo PIC register which is
44766 // uninitialized. Since FUNCTION is local and calling it
44767 // doesn't go through PLT, we use scratch register %r11 as
44768 // PIC register and initialize it here.
44769 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44770 ix86_init_large_pic_reg (tmp_regno);
44771 fnaddr = legitimize_pic_address (fnaddr,
44772 gen_rtx_REG (Pmode, tmp_regno));
44775 if (!sibcall_insn_operand (fnaddr, word_mode))
44777 tmp = gen_rtx_REG (word_mode, tmp_regno);
44778 if (GET_MODE (fnaddr) != word_mode)
44779 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44780 emit_move_insn (tmp, fnaddr);
44784 tmp = gen_rtx_MEM (QImode, fnaddr);
44785 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44786 tmp = emit_call_insn (tmp);
44787 SIBLING_CALL_P (tmp) = 1;
44791 /* Emit just enough of rest_of_compilation to get the insns emitted.
44792 Note that use_thunk calls assemble_start_function et al. */
44793 insn = get_insns ();
44794 shorten_branches (insn);
44795 final_start_function (insn, file, 1);
44796 final (insn, file, 1);
44797 final_end_function ();
44801 x86_file_start (void)
44803 default_file_start ();
44805 fputs ("\t.code16gcc\n", asm_out_file);
44807 darwin_file_start ();
44809 if (X86_FILE_START_VERSION_DIRECTIVE)
44810 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44811 if (X86_FILE_START_FLTUSED)
44812 fputs ("\t.global\t__fltused\n", asm_out_file);
44813 if (ix86_asm_dialect == ASM_INTEL)
44814 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44818 x86_field_alignment (tree field, int computed)
44821 tree type = TREE_TYPE (field);
44823 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44826 return iamcu_alignment (type, computed);
44827 mode = TYPE_MODE (strip_array_types (type));
44828 if (mode == DFmode || mode == DCmode
44829 || GET_MODE_CLASS (mode) == MODE_INT
44830 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44831 return MIN (32, computed);
44835 /* Print call to TARGET to FILE. */
44838 x86_print_call_or_nop (FILE *file, const char *target)
44840 if (flag_nop_mcount)
44841 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44843 fprintf (file, "1:\tcall\t%s\n", target);
44846 /* Output assembler code to FILE to increment profiler label # LABELNO
44847 for profiling a function entry. */
44849 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44851 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44855 #ifndef NO_PROFILE_COUNTERS
44856 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44859 if (!TARGET_PECOFF && flag_pic)
44860 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44862 x86_print_call_or_nop (file, mcount_name);
44866 #ifndef NO_PROFILE_COUNTERS
44867 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44870 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44874 #ifndef NO_PROFILE_COUNTERS
44875 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44878 x86_print_call_or_nop (file, mcount_name);
44881 if (flag_record_mcount)
44883 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44884 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44885 fprintf (file, "\t.previous\n");
44889 /* We don't have exact information about the insn sizes, but we may assume
44890 quite safely that we are informed about all 1 byte insns and memory
44891 address sizes. This is enough to eliminate unnecessary padding in
44895 min_insn_size (rtx_insn *insn)
44899 if (!INSN_P (insn) || !active_insn_p (insn))
44902 /* Discard alignments we've emit and jump instructions. */
44903 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44904 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44907 /* Important case - calls are always 5 bytes.
44908 It is common to have many calls in the row. */
44910 && symbolic_reference_mentioned_p (PATTERN (insn))
44911 && !SIBLING_CALL_P (insn))
44913 len = get_attr_length (insn);
44917 /* For normal instructions we rely on get_attr_length being exact,
44918 with a few exceptions. */
44919 if (!JUMP_P (insn))
44921 enum attr_type type = get_attr_type (insn);
44926 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44927 || asm_noperands (PATTERN (insn)) >= 0)
44934 /* Otherwise trust get_attr_length. */
44938 l = get_attr_length_address (insn);
44939 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44948 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44950 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44954 ix86_avoid_jump_mispredicts (void)
44956 rtx_insn *insn, *start = get_insns ();
44957 int nbytes = 0, njumps = 0;
44958 bool isjump = false;
44960 /* Look for all minimal intervals of instructions containing 4 jumps.
44961 The intervals are bounded by START and INSN. NBYTES is the total
44962 size of instructions in the interval including INSN and not including
44963 START. When the NBYTES is smaller than 16 bytes, it is possible
44964 that the end of START and INSN ends up in the same 16byte page.
44966 The smallest offset in the page INSN can start is the case where START
44967 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
44968 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
44970 Don't consider asm goto as jump, while it can contain a jump, it doesn't
44971 have to, control transfer to label(s) can be performed through other
44972 means, and also we estimate minimum length of all asm stmts as 0. */
44973 for (insn = start; insn; insn = NEXT_INSN (insn))
44977 if (LABEL_P (insn))
44979 int align = label_to_alignment (insn);
44980 int max_skip = label_to_max_skip (insn);
44984 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
44985 already in the current 16 byte page, because otherwise
44986 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
44987 bytes to reach 16 byte boundary. */
44989 || (align <= 3 && max_skip != (1 << align) - 1))
44992 fprintf (dump_file, "Label %i with max_skip %i\n",
44993 INSN_UID (insn), max_skip);
44996 while (nbytes + max_skip >= 16)
44998 start = NEXT_INSN (start);
44999 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45001 njumps--, isjump = true;
45004 nbytes -= min_insn_size (start);
45010 min_size = min_insn_size (insn);
45011 nbytes += min_size;
45013 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
45014 INSN_UID (insn), min_size);
45015 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
45023 start = NEXT_INSN (start);
45024 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45026 njumps--, isjump = true;
45029 nbytes -= min_insn_size (start);
45031 gcc_assert (njumps >= 0);
45033 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
45034 INSN_UID (start), INSN_UID (insn), nbytes);
45036 if (njumps == 3 && isjump && nbytes < 16)
45038 int padsize = 15 - nbytes + min_insn_size (insn);
45041 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
45042 INSN_UID (insn), padsize);
45043 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
45049 /* AMD Athlon works faster
45050 when RET is not destination of conditional jump or directly preceded
45051 by other jump instruction. We avoid the penalty by inserting NOP just
45052 before the RET instructions in such cases. */
45054 ix86_pad_returns (void)
45059 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45061 basic_block bb = e->src;
45062 rtx_insn *ret = BB_END (bb);
45064 bool replace = false;
45066 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
45067 || optimize_bb_for_size_p (bb))
45069 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
45070 if (active_insn_p (prev) || LABEL_P (prev))
45072 if (prev && LABEL_P (prev))
45077 FOR_EACH_EDGE (e, ei, bb->preds)
45078 if (EDGE_FREQUENCY (e) && e->src->index >= 0
45079 && !(e->flags & EDGE_FALLTHRU))
45087 prev = prev_active_insn (ret);
45089 && ((JUMP_P (prev) && any_condjump_p (prev))
45092 /* Empty functions get branch mispredict even when
45093 the jump destination is not visible to us. */
45094 if (!prev && !optimize_function_for_size_p (cfun))
45099 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
45105 /* Count the minimum number of instructions in BB. Return 4 if the
45106 number of instructions >= 4. */
45109 ix86_count_insn_bb (basic_block bb)
45112 int insn_count = 0;
45114 /* Count number of instructions in this block. Return 4 if the number
45115 of instructions >= 4. */
45116 FOR_BB_INSNS (bb, insn)
45118 /* Only happen in exit blocks. */
45120 && ANY_RETURN_P (PATTERN (insn)))
45123 if (NONDEBUG_INSN_P (insn)
45124 && GET_CODE (PATTERN (insn)) != USE
45125 && GET_CODE (PATTERN (insn)) != CLOBBER)
45128 if (insn_count >= 4)
45137 /* Count the minimum number of instructions in code path in BB.
45138 Return 4 if the number of instructions >= 4. */
45141 ix86_count_insn (basic_block bb)
45145 int min_prev_count;
45147 /* Only bother counting instructions along paths with no
45148 more than 2 basic blocks between entry and exit. Given
45149 that BB has an edge to exit, determine if a predecessor
45150 of BB has an edge from entry. If so, compute the number
45151 of instructions in the predecessor block. If there
45152 happen to be multiple such blocks, compute the minimum. */
45153 min_prev_count = 4;
45154 FOR_EACH_EDGE (e, ei, bb->preds)
45157 edge_iterator prev_ei;
45159 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45161 min_prev_count = 0;
45164 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
45166 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45168 int count = ix86_count_insn_bb (e->src);
45169 if (count < min_prev_count)
45170 min_prev_count = count;
45176 if (min_prev_count < 4)
45177 min_prev_count += ix86_count_insn_bb (bb);
45179 return min_prev_count;
45182 /* Pad short function to 4 instructions. */
45185 ix86_pad_short_function (void)
45190 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45192 rtx_insn *ret = BB_END (e->src);
45193 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
45195 int insn_count = ix86_count_insn (e->src);
45197 /* Pad short function. */
45198 if (insn_count < 4)
45200 rtx_insn *insn = ret;
45202 /* Find epilogue. */
45205 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
45206 insn = PREV_INSN (insn);
45211 /* Two NOPs count as one instruction. */
45212 insn_count = 2 * (4 - insn_count);
45213 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
45219 /* Fix up a Windows system unwinder issue. If an EH region falls through into
45220 the epilogue, the Windows system unwinder will apply epilogue logic and
45221 produce incorrect offsets. This can be avoided by adding a nop between
45222 the last insn that can throw and the first insn of the epilogue. */
45225 ix86_seh_fixup_eh_fallthru (void)
45230 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45232 rtx_insn *insn, *next;
45234 /* Find the beginning of the epilogue. */
45235 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
45236 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
45241 /* We only care about preceding insns that can throw. */
45242 insn = prev_active_insn (insn);
45243 if (insn == NULL || !can_throw_internal (insn))
45246 /* Do not separate calls from their debug information. */
45247 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
45249 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
45250 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
45255 emit_insn_after (gen_nops (const1_rtx), insn);
45259 /* Given a register number BASE, the lowest of a group of registers, update
45260 regsets IN and OUT with the registers that should be avoided in input
45261 and output operands respectively when trying to avoid generating a modr/m
45262 byte for -fmitigate-rop. */
45265 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
45267 SET_HARD_REG_BIT (out, base);
45268 SET_HARD_REG_BIT (out, base + 1);
45269 SET_HARD_REG_BIT (in, base + 2);
45270 SET_HARD_REG_BIT (in, base + 3);
45273 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
45274 that certain encodings of modr/m bytes do not occur. */
45276 ix86_mitigate_rop (void)
45278 HARD_REG_SET input_risky;
45279 HARD_REG_SET output_risky;
45280 HARD_REG_SET inout_risky;
45282 CLEAR_HARD_REG_SET (output_risky);
45283 CLEAR_HARD_REG_SET (input_risky);
45284 SET_HARD_REG_BIT (output_risky, AX_REG);
45285 SET_HARD_REG_BIT (output_risky, CX_REG);
45286 SET_HARD_REG_BIT (input_risky, BX_REG);
45287 SET_HARD_REG_BIT (input_risky, DX_REG);
45288 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
45289 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
45290 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
45291 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
45292 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
45293 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
45294 COPY_HARD_REG_SET (inout_risky, input_risky);
45295 IOR_HARD_REG_SET (inout_risky, output_risky);
45297 df_note_add_problem ();
45298 /* Fix up what stack-regs did. */
45299 df_insn_rescan_all ();
45302 regrename_init (true);
45303 regrename_analyze (NULL);
45305 auto_vec<du_head_p> cands;
45307 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
45309 if (!NONDEBUG_INSN_P (insn))
45312 if (GET_CODE (PATTERN (insn)) == USE
45313 || GET_CODE (PATTERN (insn)) == CLOBBER)
45316 extract_insn (insn);
45319 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45320 recog_data.n_operands, &opno0,
45323 if (!ix86_rop_should_change_byte_p (modrm))
45326 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
45328 /* This happens when regrename has to fail a block. */
45329 if (!info->op_info)
45332 if (info->op_info[opno0].n_chains != 0)
45334 gcc_assert (info->op_info[opno0].n_chains == 1);
45336 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
45337 if (op0c->target_data_1 + op0c->target_data_2 == 0
45338 && !op0c->cannot_rename)
45339 cands.safe_push (op0c);
45341 op0c->target_data_1++;
45343 if (info->op_info[opno1].n_chains != 0)
45345 gcc_assert (info->op_info[opno1].n_chains == 1);
45347 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
45348 if (op1c->target_data_1 + op1c->target_data_2 == 0
45349 && !op1c->cannot_rename)
45350 cands.safe_push (op1c);
45352 op1c->target_data_2++;
45358 FOR_EACH_VEC_ELT (cands, i, head)
45360 int old_reg, best_reg;
45361 HARD_REG_SET unavailable;
45363 CLEAR_HARD_REG_SET (unavailable);
45364 if (head->target_data_1)
45365 IOR_HARD_REG_SET (unavailable, output_risky);
45366 if (head->target_data_2)
45367 IOR_HARD_REG_SET (unavailable, input_risky);
45370 reg_class superclass = regrename_find_superclass (head, &n_uses,
45372 old_reg = head->regno;
45373 best_reg = find_rename_reg (head, superclass, &unavailable,
45375 bool ok = regrename_do_replace (head, best_reg);
45378 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
45379 reg_names[best_reg], reg_class_names[superclass]);
45383 regrename_finish ();
45390 INIT_REG_SET (&live);
45392 FOR_EACH_BB_FN (bb, cfun)
45396 COPY_REG_SET (&live, DF_LR_OUT (bb));
45397 df_simulate_initialize_backwards (bb, &live);
45399 FOR_BB_INSNS_REVERSE (bb, insn)
45401 if (!NONDEBUG_INSN_P (insn))
45404 df_simulate_one_insn_backwards (bb, insn, &live);
45406 if (GET_CODE (PATTERN (insn)) == USE
45407 || GET_CODE (PATTERN (insn)) == CLOBBER)
45410 extract_insn (insn);
45411 constrain_operands_cached (insn, reload_completed);
45413 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45414 recog_data.n_operands, &opno0,
45417 || !ix86_rop_should_change_byte_p (modrm)
45421 rtx oldreg = recog_data.operand[opno1];
45422 preprocess_constraints (insn);
45423 const operand_alternative *alt = which_op_alt ();
45426 for (i = 0; i < recog_data.n_operands; i++)
45428 && alt[i].earlyclobber
45429 && reg_overlap_mentioned_p (recog_data.operand[i],
45433 if (i < recog_data.n_operands)
45437 fprintf (dump_file,
45438 "attempting to fix modrm byte in insn %d:"
45439 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
45440 reg_class_names[alt[opno1].cl]);
45442 HARD_REG_SET unavailable;
45443 REG_SET_TO_HARD_REG_SET (unavailable, &live);
45444 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
45445 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
45446 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
45447 IOR_HARD_REG_SET (unavailable, output_risky);
45448 IOR_COMPL_HARD_REG_SET (unavailable,
45449 reg_class_contents[alt[opno1].cl]);
45451 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45452 if (!TEST_HARD_REG_BIT (unavailable, i))
45454 if (i == FIRST_PSEUDO_REGISTER)
45457 fprintf (dump_file, ", none available\n");
45461 fprintf (dump_file, " -> %d\n", i);
45462 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
45463 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
45464 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
45469 /* Implement machine specific optimizations. We implement padding of returns
45470 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
45474 /* We are freeing block_for_insn in the toplev to keep compatibility
45475 with old MDEP_REORGS that are not CFG based. Recompute it now. */
45476 compute_bb_for_insn ();
45478 if (flag_mitigate_rop)
45479 ix86_mitigate_rop ();
45481 if (TARGET_SEH && current_function_has_exception_handlers ())
45482 ix86_seh_fixup_eh_fallthru ();
45484 if (optimize && optimize_function_for_speed_p (cfun))
45486 if (TARGET_PAD_SHORT_FUNCTION)
45487 ix86_pad_short_function ();
45488 else if (TARGET_PAD_RETURNS)
45489 ix86_pad_returns ();
45490 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45491 if (TARGET_FOUR_JUMP_LIMIT)
45492 ix86_avoid_jump_mispredicts ();
45497 /* Return nonzero when QImode register that must be represented via REX prefix
45500 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
45503 extract_insn_cached (insn);
45504 for (i = 0; i < recog_data.n_operands; i++)
45505 if (GENERAL_REG_P (recog_data.operand[i])
45506 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
45511 /* Return true when INSN mentions register that must be encoded using REX
45514 x86_extended_reg_mentioned_p (rtx insn)
45516 subrtx_iterator::array_type array;
45517 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45519 const_rtx x = *iter;
45521 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45527 /* If profitable, negate (without causing overflow) integer constant
45528 of mode MODE at location LOC. Return true in this case. */
45530 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45534 if (!CONST_INT_P (*loc))
45540 /* DImode x86_64 constants must fit in 32 bits. */
45541 gcc_assert (x86_64_immediate_operand (*loc, mode));
45552 gcc_unreachable ();
45555 /* Avoid overflows. */
45556 if (mode_signbit_p (mode, *loc))
45559 val = INTVAL (*loc);
45561 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45562 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45563 if ((val < 0 && val != -128)
45566 *loc = GEN_INT (-val);
45573 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45574 optabs would emit if we didn't have TFmode patterns. */
45577 x86_emit_floatuns (rtx operands[2])
45579 rtx_code_label *neglab, *donelab;
45580 rtx i0, i1, f0, in, out;
45581 machine_mode mode, inmode;
45583 inmode = GET_MODE (operands[1]);
45584 gcc_assert (inmode == SImode || inmode == DImode);
45587 in = force_reg (inmode, operands[1]);
45588 mode = GET_MODE (out);
45589 neglab = gen_label_rtx ();
45590 donelab = gen_label_rtx ();
45591 f0 = gen_reg_rtx (mode);
45593 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45595 expand_float (out, in, 0);
45597 emit_jump_insn (gen_jump (donelab));
45600 emit_label (neglab);
45602 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45604 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45606 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45608 expand_float (f0, i0, 0);
45610 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45612 emit_label (donelab);
45615 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45616 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45617 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45618 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45620 /* Get a vector mode of the same size as the original but with elements
45621 twice as wide. This is only guaranteed to apply to integral vectors. */
45623 static inline machine_mode
45624 get_mode_wider_vector (machine_mode o)
45626 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45627 machine_mode n = GET_MODE_WIDER_MODE (o);
45628 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45629 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45633 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45634 fill target with val via vec_duplicate. */
45637 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45643 /* First attempt to recognize VAL as-is. */
45644 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45645 insn = emit_insn (gen_rtx_SET (target, dup));
45646 if (recog_memoized (insn) < 0)
45649 /* If that fails, force VAL into a register. */
45652 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45653 seq = get_insns ();
45656 emit_insn_before (seq, insn);
45658 ok = recog_memoized (insn) >= 0;
45664 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45665 with all elements equal to VAR. Return true if successful. */
45668 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45669 rtx target, rtx val)
45693 return ix86_vector_duplicate_value (mode, target, val);
45698 if (TARGET_SSE || TARGET_3DNOW_A)
45702 val = gen_lowpart (SImode, val);
45703 x = gen_rtx_TRUNCATE (HImode, val);
45704 x = gen_rtx_VEC_DUPLICATE (mode, x);
45705 emit_insn (gen_rtx_SET (target, x));
45717 return ix86_vector_duplicate_value (mode, target, val);
45721 struct expand_vec_perm_d dperm;
45725 memset (&dperm, 0, sizeof (dperm));
45726 dperm.target = target;
45727 dperm.vmode = mode;
45728 dperm.nelt = GET_MODE_NUNITS (mode);
45729 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45730 dperm.one_operand_p = true;
45732 /* Extend to SImode using a paradoxical SUBREG. */
45733 tmp1 = gen_reg_rtx (SImode);
45734 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45736 /* Insert the SImode value as low element of a V4SImode vector. */
45737 tmp2 = gen_reg_rtx (V4SImode);
45738 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45739 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45741 ok = (expand_vec_perm_1 (&dperm)
45742 || expand_vec_perm_broadcast_1 (&dperm));
45750 return ix86_vector_duplicate_value (mode, target, val);
45757 /* Replicate the value once into the next wider mode and recurse. */
45759 machine_mode smode, wsmode, wvmode;
45762 smode = GET_MODE_INNER (mode);
45763 wvmode = get_mode_wider_vector (mode);
45764 wsmode = GET_MODE_INNER (wvmode);
45766 val = convert_modes (wsmode, smode, val, true);
45767 x = expand_simple_binop (wsmode, ASHIFT, val,
45768 GEN_INT (GET_MODE_BITSIZE (smode)),
45769 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45770 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45772 x = gen_reg_rtx (wvmode);
45773 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45775 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45782 return ix86_vector_duplicate_value (mode, target, val);
45785 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45786 rtx x = gen_reg_rtx (hvmode);
45788 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45791 x = gen_rtx_VEC_CONCAT (mode, x, x);
45792 emit_insn (gen_rtx_SET (target, x));
45798 if (TARGET_AVX512BW)
45799 return ix86_vector_duplicate_value (mode, target, val);
45802 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45803 rtx x = gen_reg_rtx (hvmode);
45805 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45808 x = gen_rtx_VEC_CONCAT (mode, x, x);
45809 emit_insn (gen_rtx_SET (target, x));
45818 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45819 whose ONE_VAR element is VAR, and other elements are zero. Return true
45823 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45824 rtx target, rtx var, int one_var)
45826 machine_mode vsimode;
45829 bool use_vector_set = false;
45834 /* For SSE4.1, we normally use vector set. But if the second
45835 element is zero and inter-unit moves are OK, we use movq
45837 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45838 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45844 use_vector_set = TARGET_SSE4_1;
45847 use_vector_set = TARGET_SSE2;
45850 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45857 use_vector_set = TARGET_AVX;
45860 /* Use ix86_expand_vector_set in 64bit mode only. */
45861 use_vector_set = TARGET_AVX && TARGET_64BIT;
45867 if (use_vector_set)
45869 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45870 var = force_reg (GET_MODE_INNER (mode), var);
45871 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45887 var = force_reg (GET_MODE_INNER (mode), var);
45888 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45889 emit_insn (gen_rtx_SET (target, x));
45894 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45895 new_target = gen_reg_rtx (mode);
45897 new_target = target;
45898 var = force_reg (GET_MODE_INNER (mode), var);
45899 x = gen_rtx_VEC_DUPLICATE (mode, var);
45900 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45901 emit_insn (gen_rtx_SET (new_target, x));
45904 /* We need to shuffle the value to the correct position, so
45905 create a new pseudo to store the intermediate result. */
45907 /* With SSE2, we can use the integer shuffle insns. */
45908 if (mode != V4SFmode && TARGET_SSE2)
45910 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45912 GEN_INT (one_var == 1 ? 0 : 1),
45913 GEN_INT (one_var == 2 ? 0 : 1),
45914 GEN_INT (one_var == 3 ? 0 : 1)));
45915 if (target != new_target)
45916 emit_move_insn (target, new_target);
45920 /* Otherwise convert the intermediate result to V4SFmode and
45921 use the SSE1 shuffle instructions. */
45922 if (mode != V4SFmode)
45924 tmp = gen_reg_rtx (V4SFmode);
45925 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45930 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45932 GEN_INT (one_var == 1 ? 0 : 1),
45933 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45934 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45936 if (mode != V4SFmode)
45937 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45938 else if (tmp != target)
45939 emit_move_insn (target, tmp);
45941 else if (target != new_target)
45942 emit_move_insn (target, new_target);
45947 vsimode = V4SImode;
45953 vsimode = V2SImode;
45959 /* Zero extend the variable element to SImode and recurse. */
45960 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
45962 x = gen_reg_rtx (vsimode);
45963 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
45965 gcc_unreachable ();
45967 emit_move_insn (target, gen_lowpart (mode, x));
45975 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45976 consisting of the values in VALS. It is known that all elements
45977 except ONE_VAR are constants. Return true if successful. */
45980 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
45981 rtx target, rtx vals, int one_var)
45983 rtx var = XVECEXP (vals, 0, one_var);
45984 machine_mode wmode;
45987 const_vec = copy_rtx (vals);
45988 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
45989 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
45997 /* For the two element vectors, it's just as easy to use
45998 the general case. */
46002 /* Use ix86_expand_vector_set in 64bit mode only. */
46025 /* There's no way to set one QImode entry easily. Combine
46026 the variable value with its adjacent constant value, and
46027 promote to an HImode set. */
46028 x = XVECEXP (vals, 0, one_var ^ 1);
46031 var = convert_modes (HImode, QImode, var, true);
46032 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
46033 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46034 x = GEN_INT (INTVAL (x) & 0xff);
46038 var = convert_modes (HImode, QImode, var, true);
46039 x = gen_int_mode (INTVAL (x) << 8, HImode);
46041 if (x != const0_rtx)
46042 var = expand_simple_binop (HImode, IOR, var, x, var,
46043 1, OPTAB_LIB_WIDEN);
46045 x = gen_reg_rtx (wmode);
46046 emit_move_insn (x, gen_lowpart (wmode, const_vec));
46047 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
46049 emit_move_insn (target, gen_lowpart (mode, x));
46056 emit_move_insn (target, const_vec);
46057 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46061 /* A subroutine of ix86_expand_vector_init_general. Use vector
46062 concatenate to handle the most general case: all values variable,
46063 and none identical. */
46066 ix86_expand_vector_init_concat (machine_mode mode,
46067 rtx target, rtx *ops, int n)
46069 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
46070 rtx first[16], second[8], third[4];
46122 gcc_unreachable ();
46125 if (!register_operand (ops[1], cmode))
46126 ops[1] = force_reg (cmode, ops[1]);
46127 if (!register_operand (ops[0], cmode))
46128 ops[0] = force_reg (cmode, ops[0]);
46129 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
46149 gcc_unreachable ();
46173 gcc_unreachable ();
46191 gcc_unreachable ();
46196 /* FIXME: We process inputs backward to help RA. PR 36222. */
46199 for (; i > 0; i -= 2, j--)
46201 first[j] = gen_reg_rtx (cmode);
46202 v = gen_rtvec (2, ops[i - 1], ops[i]);
46203 ix86_expand_vector_init (false, first[j],
46204 gen_rtx_PARALLEL (cmode, v));
46210 gcc_assert (hmode != VOIDmode);
46211 gcc_assert (gmode != VOIDmode);
46212 for (i = j = 0; i < n; i += 2, j++)
46214 second[j] = gen_reg_rtx (hmode);
46215 ix86_expand_vector_init_concat (hmode, second [j],
46219 for (i = j = 0; i < n; i += 2, j++)
46221 third[j] = gen_reg_rtx (gmode);
46222 ix86_expand_vector_init_concat (gmode, third[j],
46226 ix86_expand_vector_init_concat (mode, target, third, n);
46230 gcc_assert (hmode != VOIDmode);
46231 for (i = j = 0; i < n; i += 2, j++)
46233 second[j] = gen_reg_rtx (hmode);
46234 ix86_expand_vector_init_concat (hmode, second [j],
46238 ix86_expand_vector_init_concat (mode, target, second, n);
46241 ix86_expand_vector_init_concat (mode, target, first, n);
46245 gcc_unreachable ();
46249 /* A subroutine of ix86_expand_vector_init_general. Use vector
46250 interleave to handle the most general case: all values variable,
46251 and none identical. */
46254 ix86_expand_vector_init_interleave (machine_mode mode,
46255 rtx target, rtx *ops, int n)
46257 machine_mode first_imode, second_imode, third_imode, inner_mode;
46260 rtx (*gen_load_even) (rtx, rtx, rtx);
46261 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
46262 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
46267 gen_load_even = gen_vec_setv8hi;
46268 gen_interleave_first_low = gen_vec_interleave_lowv4si;
46269 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46270 inner_mode = HImode;
46271 first_imode = V4SImode;
46272 second_imode = V2DImode;
46273 third_imode = VOIDmode;
46276 gen_load_even = gen_vec_setv16qi;
46277 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
46278 gen_interleave_second_low = gen_vec_interleave_lowv4si;
46279 inner_mode = QImode;
46280 first_imode = V8HImode;
46281 second_imode = V4SImode;
46282 third_imode = V2DImode;
46285 gcc_unreachable ();
46288 for (i = 0; i < n; i++)
46290 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
46291 op0 = gen_reg_rtx (SImode);
46292 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
46294 /* Insert the SImode value as low element of V4SImode vector. */
46295 op1 = gen_reg_rtx (V4SImode);
46296 op0 = gen_rtx_VEC_MERGE (V4SImode,
46297 gen_rtx_VEC_DUPLICATE (V4SImode,
46299 CONST0_RTX (V4SImode),
46301 emit_insn (gen_rtx_SET (op1, op0));
46303 /* Cast the V4SImode vector back to a vector in orignal mode. */
46304 op0 = gen_reg_rtx (mode);
46305 emit_move_insn (op0, gen_lowpart (mode, op1));
46307 /* Load even elements into the second position. */
46308 emit_insn (gen_load_even (op0,
46309 force_reg (inner_mode,
46313 /* Cast vector to FIRST_IMODE vector. */
46314 ops[i] = gen_reg_rtx (first_imode);
46315 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
46318 /* Interleave low FIRST_IMODE vectors. */
46319 for (i = j = 0; i < n; i += 2, j++)
46321 op0 = gen_reg_rtx (first_imode);
46322 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
46324 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
46325 ops[j] = gen_reg_rtx (second_imode);
46326 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
46329 /* Interleave low SECOND_IMODE vectors. */
46330 switch (second_imode)
46333 for (i = j = 0; i < n / 2; i += 2, j++)
46335 op0 = gen_reg_rtx (second_imode);
46336 emit_insn (gen_interleave_second_low (op0, ops[i],
46339 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
46341 ops[j] = gen_reg_rtx (third_imode);
46342 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
46344 second_imode = V2DImode;
46345 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46349 op0 = gen_reg_rtx (second_imode);
46350 emit_insn (gen_interleave_second_low (op0, ops[0],
46353 /* Cast the SECOND_IMODE vector back to a vector on original
46355 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
46359 gcc_unreachable ();
46363 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
46364 all values variable, and none identical. */
46367 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
46368 rtx target, rtx vals)
46370 rtx ops[64], op0, op1, op2, op3, op4, op5;
46371 machine_mode half_mode = VOIDmode;
46372 machine_mode quarter_mode = VOIDmode;
46379 if (!mmx_ok && !TARGET_SSE)
46395 n = GET_MODE_NUNITS (mode);
46396 for (i = 0; i < n; i++)
46397 ops[i] = XVECEXP (vals, 0, i);
46398 ix86_expand_vector_init_concat (mode, target, ops, n);
46402 half_mode = V16QImode;
46406 half_mode = V8HImode;
46410 n = GET_MODE_NUNITS (mode);
46411 for (i = 0; i < n; i++)
46412 ops[i] = XVECEXP (vals, 0, i);
46413 op0 = gen_reg_rtx (half_mode);
46414 op1 = gen_reg_rtx (half_mode);
46415 ix86_expand_vector_init_interleave (half_mode, op0, ops,
46417 ix86_expand_vector_init_interleave (half_mode, op1,
46418 &ops [n >> 1], n >> 2);
46419 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
46423 quarter_mode = V16QImode;
46424 half_mode = V32QImode;
46428 quarter_mode = V8HImode;
46429 half_mode = V16HImode;
46433 n = GET_MODE_NUNITS (mode);
46434 for (i = 0; i < n; i++)
46435 ops[i] = XVECEXP (vals, 0, i);
46436 op0 = gen_reg_rtx (quarter_mode);
46437 op1 = gen_reg_rtx (quarter_mode);
46438 op2 = gen_reg_rtx (quarter_mode);
46439 op3 = gen_reg_rtx (quarter_mode);
46440 op4 = gen_reg_rtx (half_mode);
46441 op5 = gen_reg_rtx (half_mode);
46442 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
46444 ix86_expand_vector_init_interleave (quarter_mode, op1,
46445 &ops [n >> 2], n >> 3);
46446 ix86_expand_vector_init_interleave (quarter_mode, op2,
46447 &ops [n >> 1], n >> 3);
46448 ix86_expand_vector_init_interleave (quarter_mode, op3,
46449 &ops [(n >> 1) | (n >> 2)], n >> 3);
46450 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
46451 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
46452 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
46456 if (!TARGET_SSE4_1)
46464 /* Don't use ix86_expand_vector_init_interleave if we can't
46465 move from GPR to SSE register directly. */
46466 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
46469 n = GET_MODE_NUNITS (mode);
46470 for (i = 0; i < n; i++)
46471 ops[i] = XVECEXP (vals, 0, i);
46472 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
46480 gcc_unreachable ();
46484 int i, j, n_elts, n_words, n_elt_per_word;
46485 machine_mode inner_mode;
46486 rtx words[4], shift;
46488 inner_mode = GET_MODE_INNER (mode);
46489 n_elts = GET_MODE_NUNITS (mode);
46490 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46491 n_elt_per_word = n_elts / n_words;
46492 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
46494 for (i = 0; i < n_words; ++i)
46496 rtx word = NULL_RTX;
46498 for (j = 0; j < n_elt_per_word; ++j)
46500 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
46501 elt = convert_modes (word_mode, inner_mode, elt, true);
46507 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
46508 word, 1, OPTAB_LIB_WIDEN);
46509 word = expand_simple_binop (word_mode, IOR, word, elt,
46510 word, 1, OPTAB_LIB_WIDEN);
46518 emit_move_insn (target, gen_lowpart (mode, words[0]));
46519 else if (n_words == 2)
46521 rtx tmp = gen_reg_rtx (mode);
46522 emit_clobber (tmp);
46523 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46524 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46525 emit_move_insn (target, tmp);
46527 else if (n_words == 4)
46529 rtx tmp = gen_reg_rtx (V4SImode);
46530 gcc_assert (word_mode == SImode);
46531 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46532 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46533 emit_move_insn (target, gen_lowpart (mode, tmp));
46536 gcc_unreachable ();
46540 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46541 instructions unless MMX_OK is true. */
46544 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46546 machine_mode mode = GET_MODE (target);
46547 machine_mode inner_mode = GET_MODE_INNER (mode);
46548 int n_elts = GET_MODE_NUNITS (mode);
46549 int n_var = 0, one_var = -1;
46550 bool all_same = true, all_const_zero = true;
46554 for (i = 0; i < n_elts; ++i)
46556 x = XVECEXP (vals, 0, i);
46557 if (!(CONST_SCALAR_INT_P (x)
46558 || CONST_DOUBLE_P (x)
46559 || CONST_FIXED_P (x)))
46560 n_var++, one_var = i;
46561 else if (x != CONST0_RTX (inner_mode))
46562 all_const_zero = false;
46563 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46567 /* Constants are best loaded from the constant pool. */
46570 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46574 /* If all values are identical, broadcast the value. */
46576 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46577 XVECEXP (vals, 0, 0)))
46580 /* Values where only one field is non-constant are best loaded from
46581 the pool and overwritten via move later. */
46585 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46586 XVECEXP (vals, 0, one_var),
46590 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46594 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46598 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46600 machine_mode mode = GET_MODE (target);
46601 machine_mode inner_mode = GET_MODE_INNER (mode);
46602 machine_mode half_mode;
46603 bool use_vec_merge = false;
46605 static rtx (*gen_extract[6][2]) (rtx, rtx)
46607 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46608 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46609 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46610 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46611 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46612 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46614 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46616 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46617 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46618 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46619 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46620 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46621 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46624 machine_mode mmode = VOIDmode;
46625 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46633 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46634 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46636 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46638 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46639 emit_insn (gen_rtx_SET (target, tmp));
46645 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46649 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46650 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46652 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46654 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46655 emit_insn (gen_rtx_SET (target, tmp));
46662 /* For the two element vectors, we implement a VEC_CONCAT with
46663 the extraction of the other element. */
46665 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46666 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46669 op0 = val, op1 = tmp;
46671 op0 = tmp, op1 = val;
46673 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46674 emit_insn (gen_rtx_SET (target, tmp));
46679 use_vec_merge = TARGET_SSE4_1;
46686 use_vec_merge = true;
46690 /* tmp = target = A B C D */
46691 tmp = copy_to_reg (target);
46692 /* target = A A B B */
46693 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46694 /* target = X A B B */
46695 ix86_expand_vector_set (false, target, val, 0);
46696 /* target = A X C D */
46697 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46698 const1_rtx, const0_rtx,
46699 GEN_INT (2+4), GEN_INT (3+4)));
46703 /* tmp = target = A B C D */
46704 tmp = copy_to_reg (target);
46705 /* tmp = X B C D */
46706 ix86_expand_vector_set (false, tmp, val, 0);
46707 /* target = A B X D */
46708 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46709 const0_rtx, const1_rtx,
46710 GEN_INT (0+4), GEN_INT (3+4)));
46714 /* tmp = target = A B C D */
46715 tmp = copy_to_reg (target);
46716 /* tmp = X B C D */
46717 ix86_expand_vector_set (false, tmp, val, 0);
46718 /* target = A B X D */
46719 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46720 const0_rtx, const1_rtx,
46721 GEN_INT (2+4), GEN_INT (0+4)));
46725 gcc_unreachable ();
46730 use_vec_merge = TARGET_SSE4_1;
46734 /* Element 0 handled by vec_merge below. */
46737 use_vec_merge = true;
46743 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46744 store into element 0, then shuffle them back. */
46748 order[0] = GEN_INT (elt);
46749 order[1] = const1_rtx;
46750 order[2] = const2_rtx;
46751 order[3] = GEN_INT (3);
46752 order[elt] = const0_rtx;
46754 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46755 order[1], order[2], order[3]));
46757 ix86_expand_vector_set (false, target, val, 0);
46759 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46760 order[1], order[2], order[3]));
46764 /* For SSE1, we have to reuse the V4SF code. */
46765 rtx t = gen_reg_rtx (V4SFmode);
46766 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46767 emit_move_insn (target, gen_lowpart (mode, t));
46772 use_vec_merge = TARGET_SSE2;
46775 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46779 use_vec_merge = TARGET_SSE4_1;
46786 half_mode = V16QImode;
46792 half_mode = V8HImode;
46798 half_mode = V4SImode;
46804 half_mode = V2DImode;
46810 half_mode = V4SFmode;
46816 half_mode = V2DFmode;
46822 /* Compute offset. */
46826 gcc_assert (i <= 1);
46828 /* Extract the half. */
46829 tmp = gen_reg_rtx (half_mode);
46830 emit_insn (gen_extract[j][i] (tmp, target));
46832 /* Put val in tmp at elt. */
46833 ix86_expand_vector_set (false, tmp, val, elt);
46836 emit_insn (gen_insert[j][i] (target, target, tmp));
46840 if (TARGET_AVX512F)
46843 gen_blendm = gen_avx512f_blendmv8df;
46848 if (TARGET_AVX512F)
46851 gen_blendm = gen_avx512f_blendmv8di;
46856 if (TARGET_AVX512F)
46859 gen_blendm = gen_avx512f_blendmv16sf;
46864 if (TARGET_AVX512F)
46867 gen_blendm = gen_avx512f_blendmv16si;
46872 if (TARGET_AVX512F && TARGET_AVX512BW)
46875 gen_blendm = gen_avx512bw_blendmv32hi;
46880 if (TARGET_AVX512F && TARGET_AVX512BW)
46883 gen_blendm = gen_avx512bw_blendmv64qi;
46891 if (mmode != VOIDmode)
46893 tmp = gen_reg_rtx (mode);
46894 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46895 emit_insn (gen_blendm (target, tmp, target,
46897 gen_int_mode (1 << elt, mmode))));
46899 else if (use_vec_merge)
46901 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46902 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46903 emit_insn (gen_rtx_SET (target, tmp));
46907 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46909 emit_move_insn (mem, target);
46911 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46912 emit_move_insn (tmp, val);
46914 emit_move_insn (target, mem);
46919 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46921 machine_mode mode = GET_MODE (vec);
46922 machine_mode inner_mode = GET_MODE_INNER (mode);
46923 bool use_vec_extr = false;
46936 use_vec_extr = true;
46940 use_vec_extr = TARGET_SSE4_1;
46952 tmp = gen_reg_rtx (mode);
46953 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46954 GEN_INT (elt), GEN_INT (elt),
46955 GEN_INT (elt+4), GEN_INT (elt+4)));
46959 tmp = gen_reg_rtx (mode);
46960 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
46964 gcc_unreachable ();
46967 use_vec_extr = true;
46972 use_vec_extr = TARGET_SSE4_1;
46986 tmp = gen_reg_rtx (mode);
46987 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
46988 GEN_INT (elt), GEN_INT (elt),
46989 GEN_INT (elt), GEN_INT (elt)));
46993 tmp = gen_reg_rtx (mode);
46994 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
46998 gcc_unreachable ();
47001 use_vec_extr = true;
47006 /* For SSE1, we have to reuse the V4SF code. */
47007 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
47008 gen_lowpart (V4SFmode, vec), elt);
47014 use_vec_extr = TARGET_SSE2;
47017 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47021 use_vec_extr = TARGET_SSE4_1;
47027 tmp = gen_reg_rtx (V4SFmode);
47029 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
47031 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
47032 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47040 tmp = gen_reg_rtx (V2DFmode);
47042 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
47044 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
47045 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47053 tmp = gen_reg_rtx (V16QImode);
47055 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
47057 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
47058 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47066 tmp = gen_reg_rtx (V8HImode);
47068 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
47070 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
47071 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47079 tmp = gen_reg_rtx (V4SImode);
47081 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
47083 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
47084 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47092 tmp = gen_reg_rtx (V2DImode);
47094 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
47096 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
47097 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47103 if (TARGET_AVX512BW)
47105 tmp = gen_reg_rtx (V16HImode);
47107 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
47109 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
47110 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47116 if (TARGET_AVX512BW)
47118 tmp = gen_reg_rtx (V32QImode);
47120 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
47122 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
47123 ix86_expand_vector_extract (false, target, tmp, elt & 31);
47129 tmp = gen_reg_rtx (V8SFmode);
47131 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
47133 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
47134 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47138 tmp = gen_reg_rtx (V4DFmode);
47140 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
47142 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
47143 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47147 tmp = gen_reg_rtx (V8SImode);
47149 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
47151 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
47152 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47156 tmp = gen_reg_rtx (V4DImode);
47158 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
47160 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
47161 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47165 /* ??? Could extract the appropriate HImode element and shift. */
47172 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
47173 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
47175 /* Let the rtl optimizers know about the zero extension performed. */
47176 if (inner_mode == QImode || inner_mode == HImode)
47178 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
47179 target = gen_lowpart (SImode, target);
47182 emit_insn (gen_rtx_SET (target, tmp));
47186 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47188 emit_move_insn (mem, vec);
47190 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47191 emit_move_insn (target, tmp);
47195 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
47196 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
47197 The upper bits of DEST are undefined, though they shouldn't cause
47198 exceptions (some bits from src or all zeros are ok). */
47201 emit_reduc_half (rtx dest, rtx src, int i)
47204 switch (GET_MODE (src))
47208 tem = gen_sse_movhlps (dest, src, src);
47210 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
47211 GEN_INT (1 + 4), GEN_INT (1 + 4));
47214 tem = gen_vec_interleave_highv2df (dest, src, src);
47220 d = gen_reg_rtx (V1TImode);
47221 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
47226 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
47228 tem = gen_avx_shufps256 (dest, src, src,
47229 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
47233 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
47235 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
47243 if (GET_MODE (dest) != V4DImode)
47244 d = gen_reg_rtx (V4DImode);
47245 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
47246 gen_lowpart (V4DImode, src),
47251 d = gen_reg_rtx (V2TImode);
47252 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
47263 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
47264 gen_lowpart (V16SImode, src),
47265 gen_lowpart (V16SImode, src),
47266 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
47267 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
47268 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
47269 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
47270 GEN_INT (0xC), GEN_INT (0xD),
47271 GEN_INT (0xE), GEN_INT (0xF),
47272 GEN_INT (0x10), GEN_INT (0x11),
47273 GEN_INT (0x12), GEN_INT (0x13),
47274 GEN_INT (0x14), GEN_INT (0x15),
47275 GEN_INT (0x16), GEN_INT (0x17));
47277 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
47278 gen_lowpart (V16SImode, src),
47279 GEN_INT (i == 128 ? 0x2 : 0x1),
47283 GEN_INT (i == 128 ? 0x6 : 0x5),
47287 GEN_INT (i == 128 ? 0xA : 0x9),
47291 GEN_INT (i == 128 ? 0xE : 0xD),
47297 gcc_unreachable ();
47301 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
47304 /* Expand a vector reduction. FN is the binary pattern to reduce;
47305 DEST is the destination; IN is the input vector. */
47308 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
47310 rtx half, dst, vec = in;
47311 machine_mode mode = GET_MODE (in);
47314 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
47316 && mode == V8HImode
47317 && fn == gen_uminv8hi3)
47319 emit_insn (gen_sse4_1_phminposuw (dest, in));
47323 for (i = GET_MODE_BITSIZE (mode);
47324 i > GET_MODE_UNIT_BITSIZE (mode);
47327 half = gen_reg_rtx (mode);
47328 emit_reduc_half (half, vec, i);
47329 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
47332 dst = gen_reg_rtx (mode);
47333 emit_insn (fn (dst, half, vec));
47338 /* Target hook for scalar_mode_supported_p. */
47340 ix86_scalar_mode_supported_p (machine_mode mode)
47342 if (DECIMAL_FLOAT_MODE_P (mode))
47343 return default_decimal_float_supported_p ();
47344 else if (mode == TFmode)
47347 return default_scalar_mode_supported_p (mode);
47350 /* Implements target hook vector_mode_supported_p. */
47352 ix86_vector_mode_supported_p (machine_mode mode)
47354 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
47356 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
47358 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
47360 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
47362 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
47364 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
47369 /* Implement target hook libgcc_floating_mode_supported_p. */
47371 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
47381 #ifdef IX86_NO_LIBGCC_TFMODE
47383 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
47384 return TARGET_LONG_DOUBLE_128;
47394 /* Target hook for c_mode_for_suffix. */
47395 static machine_mode
47396 ix86_c_mode_for_suffix (char suffix)
47406 /* Worker function for TARGET_MD_ASM_ADJUST.
47408 We implement asm flag outputs, and maintain source compatibility
47409 with the old cc0-based compiler. */
47412 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
47413 vec<const char *> &constraints,
47414 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
47416 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
47417 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
47419 bool saw_asm_flag = false;
47422 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
47424 const char *con = constraints[i];
47425 if (strncmp (con, "=@cc", 4) != 0)
47428 if (strchr (con, ',') != NULL)
47430 error ("alternatives not allowed in asm flag output");
47434 bool invert = false;
47436 invert = true, con++;
47438 machine_mode mode = CCmode;
47439 rtx_code code = UNKNOWN;
47445 mode = CCAmode, code = EQ;
47446 else if (con[1] == 'e' && con[2] == 0)
47447 mode = CCCmode, code = NE;
47451 mode = CCCmode, code = EQ;
47452 else if (con[1] == 'e' && con[2] == 0)
47453 mode = CCAmode, code = NE;
47457 mode = CCCmode, code = EQ;
47461 mode = CCZmode, code = EQ;
47465 mode = CCGCmode, code = GT;
47466 else if (con[1] == 'e' && con[2] == 0)
47467 mode = CCGCmode, code = GE;
47471 mode = CCGCmode, code = LT;
47472 else if (con[1] == 'e' && con[2] == 0)
47473 mode = CCGCmode, code = LE;
47477 mode = CCOmode, code = EQ;
47481 mode = CCPmode, code = EQ;
47485 mode = CCSmode, code = EQ;
47489 mode = CCZmode, code = EQ;
47492 if (code == UNKNOWN)
47494 error ("unknown asm flag output %qs", constraints[i]);
47498 code = reverse_condition (code);
47500 rtx dest = outputs[i];
47503 /* This is the first asm flag output. Here we put the flags
47504 register in as the real output and adjust the condition to
47506 constraints[i] = "=Bf";
47507 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
47508 saw_asm_flag = true;
47512 /* We don't need the flags register as output twice. */
47513 constraints[i] = "=X";
47514 outputs[i] = gen_rtx_SCRATCH (SImode);
47517 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47518 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47520 machine_mode dest_mode = GET_MODE (dest);
47521 if (!SCALAR_INT_MODE_P (dest_mode))
47523 error ("invalid type for asm flag output");
47527 if (dest_mode == DImode && !TARGET_64BIT)
47528 dest_mode = SImode;
47530 if (dest_mode != QImode)
47532 rtx destqi = gen_reg_rtx (QImode);
47533 emit_insn (gen_rtx_SET (destqi, x));
47535 if (TARGET_ZERO_EXTEND_WITH_AND
47536 && optimize_function_for_speed_p (cfun))
47538 x = force_reg (dest_mode, const0_rtx);
47540 emit_insn (gen_movstrictqi
47541 (gen_lowpart (QImode, x), destqi));
47544 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47547 if (dest_mode != GET_MODE (dest))
47549 rtx tmp = gen_reg_rtx (SImode);
47551 emit_insn (gen_rtx_SET (tmp, x));
47552 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47555 emit_insn (gen_rtx_SET (dest, x));
47557 rtx_insn *seq = get_insns ();
47564 /* If we had no asm flag outputs, clobber the flags. */
47565 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47566 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47571 /* Implements target vector targetm.asm.encode_section_info. */
47573 static void ATTRIBUTE_UNUSED
47574 ix86_encode_section_info (tree decl, rtx rtl, int first)
47576 default_encode_section_info (decl, rtl, first);
47578 if (ix86_in_large_data_p (decl))
47579 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47582 /* Worker function for REVERSE_CONDITION. */
47585 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47587 return (mode != CCFPmode && mode != CCFPUmode
47588 ? reverse_condition (code)
47589 : reverse_condition_maybe_unordered (code));
47592 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47596 output_387_reg_move (rtx insn, rtx *operands)
47598 if (REG_P (operands[0]))
47600 if (REG_P (operands[1])
47601 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47603 if (REGNO (operands[0]) == FIRST_STACK_REG)
47604 return output_387_ffreep (operands, 0);
47605 return "fstp\t%y0";
47607 if (STACK_TOP_P (operands[0]))
47608 return "fld%Z1\t%y1";
47611 else if (MEM_P (operands[0]))
47613 gcc_assert (REG_P (operands[1]));
47614 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47615 return "fstp%Z0\t%y0";
47618 /* There is no non-popping store to memory for XFmode.
47619 So if we need one, follow the store with a load. */
47620 if (GET_MODE (operands[0]) == XFmode)
47621 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47623 return "fst%Z0\t%y0";
47630 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47631 FP status register is set. */
47634 ix86_emit_fp_unordered_jump (rtx label)
47636 rtx reg = gen_reg_rtx (HImode);
47639 emit_insn (gen_x86_fnstsw_1 (reg));
47641 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47643 emit_insn (gen_x86_sahf_1 (reg));
47645 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47646 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47650 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47652 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47653 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47656 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47657 gen_rtx_LABEL_REF (VOIDmode, label),
47659 temp = gen_rtx_SET (pc_rtx, temp);
47661 emit_jump_insn (temp);
47662 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47665 /* Output code to perform a log1p XFmode calculation. */
47667 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47669 rtx_code_label *label1 = gen_label_rtx ();
47670 rtx_code_label *label2 = gen_label_rtx ();
47672 rtx tmp = gen_reg_rtx (XFmode);
47673 rtx tmp2 = gen_reg_rtx (XFmode);
47676 emit_insn (gen_absxf2 (tmp, op1));
47677 test = gen_rtx_GE (VOIDmode, tmp,
47678 const_double_from_real_value (
47679 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47681 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47683 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47684 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47685 emit_jump (label2);
47687 emit_label (label1);
47688 emit_move_insn (tmp, CONST1_RTX (XFmode));
47689 emit_insn (gen_addxf3 (tmp, op1, tmp));
47690 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47691 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47693 emit_label (label2);
47696 /* Emit code for round calculation. */
47697 void ix86_emit_i387_round (rtx op0, rtx op1)
47699 machine_mode inmode = GET_MODE (op1);
47700 machine_mode outmode = GET_MODE (op0);
47701 rtx e1, e2, res, tmp, tmp1, half;
47702 rtx scratch = gen_reg_rtx (HImode);
47703 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47704 rtx_code_label *jump_label = gen_label_rtx ();
47706 rtx (*gen_abs) (rtx, rtx);
47707 rtx (*gen_neg) (rtx, rtx);
47712 gen_abs = gen_abssf2;
47715 gen_abs = gen_absdf2;
47718 gen_abs = gen_absxf2;
47721 gcc_unreachable ();
47727 gen_neg = gen_negsf2;
47730 gen_neg = gen_negdf2;
47733 gen_neg = gen_negxf2;
47736 gen_neg = gen_neghi2;
47739 gen_neg = gen_negsi2;
47742 gen_neg = gen_negdi2;
47745 gcc_unreachable ();
47748 e1 = gen_reg_rtx (inmode);
47749 e2 = gen_reg_rtx (inmode);
47750 res = gen_reg_rtx (outmode);
47752 half = const_double_from_real_value (dconsthalf, inmode);
47754 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47756 /* scratch = fxam(op1) */
47757 emit_insn (gen_rtx_SET (scratch,
47758 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47760 /* e1 = fabs(op1) */
47761 emit_insn (gen_abs (e1, op1));
47763 /* e2 = e1 + 0.5 */
47764 half = force_reg (inmode, half);
47765 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47767 /* res = floor(e2) */
47768 if (inmode != XFmode)
47770 tmp1 = gen_reg_rtx (XFmode);
47772 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47782 rtx tmp0 = gen_reg_rtx (XFmode);
47784 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47786 emit_insn (gen_rtx_SET (res,
47787 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47788 UNSPEC_TRUNC_NOOP)));
47792 emit_insn (gen_frndintxf2_floor (res, tmp1));
47795 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47798 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47801 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47804 gcc_unreachable ();
47807 /* flags = signbit(a) */
47808 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47810 /* if (flags) then res = -res */
47811 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47812 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47813 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47815 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47816 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47817 JUMP_LABEL (insn) = jump_label;
47819 emit_insn (gen_neg (res, res));
47821 emit_label (jump_label);
47822 LABEL_NUSES (jump_label) = 1;
47824 emit_move_insn (op0, res);
47827 /* Output code to perform a Newton-Rhapson approximation of a single precision
47828 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47830 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47832 rtx x0, x1, e0, e1;
47834 x0 = gen_reg_rtx (mode);
47835 e0 = gen_reg_rtx (mode);
47836 e1 = gen_reg_rtx (mode);
47837 x1 = gen_reg_rtx (mode);
47839 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47841 b = force_reg (mode, b);
47843 /* x0 = rcp(b) estimate */
47844 if (mode == V16SFmode || mode == V8DFmode)
47845 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47848 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47852 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47855 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47858 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47861 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47864 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47867 /* Output code to perform a Newton-Rhapson approximation of a
47868 single precision floating point [reciprocal] square root. */
47870 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
47872 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47876 x0 = gen_reg_rtx (mode);
47877 e0 = gen_reg_rtx (mode);
47878 e1 = gen_reg_rtx (mode);
47879 e2 = gen_reg_rtx (mode);
47880 e3 = gen_reg_rtx (mode);
47882 real_from_integer (&r, VOIDmode, -3, SIGNED);
47883 mthree = const_double_from_real_value (r, SFmode);
47885 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47886 mhalf = const_double_from_real_value (r, SFmode);
47887 unspec = UNSPEC_RSQRT;
47889 if (VECTOR_MODE_P (mode))
47891 mthree = ix86_build_const_vector (mode, true, mthree);
47892 mhalf = ix86_build_const_vector (mode, true, mhalf);
47893 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47894 if (GET_MODE_SIZE (mode) == 64)
47895 unspec = UNSPEC_RSQRT14;
47898 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47899 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47901 a = force_reg (mode, a);
47903 /* x0 = rsqrt(a) estimate */
47904 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47907 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47910 rtx zero = force_reg (mode, CONST0_RTX(mode));
47913 /* Handle masked compare. */
47914 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47916 mask = gen_reg_rtx (HImode);
47917 /* Imm value 0x4 corresponds to not-equal comparison. */
47918 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47919 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47923 mask = gen_reg_rtx (mode);
47924 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47925 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47930 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47932 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47935 mthree = force_reg (mode, mthree);
47936 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47938 mhalf = force_reg (mode, mhalf);
47940 /* e3 = -.5 * x0 */
47941 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47943 /* e3 = -.5 * e0 */
47944 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47945 /* ret = e2 * e3 */
47946 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47949 #ifdef TARGET_SOLARIS
47950 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47953 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47956 /* With Binutils 2.15, the "@unwind" marker must be specified on
47957 every occurrence of the ".eh_frame" section, not just the first
47960 && strcmp (name, ".eh_frame") == 0)
47962 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
47963 flags & SECTION_WRITE ? "aw" : "a");
47968 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
47970 solaris_elf_asm_comdat_section (name, flags, decl);
47975 default_elf_asm_named_section (name, flags, decl);
47977 #endif /* TARGET_SOLARIS */
47979 /* Return the mangling of TYPE if it is an extended fundamental type. */
47981 static const char *
47982 ix86_mangle_type (const_tree type)
47984 type = TYPE_MAIN_VARIANT (type);
47986 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
47987 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
47990 switch (TYPE_MODE (type))
47993 /* __float128 is "g". */
47996 /* "long double" or __float80 is "e". */
48003 /* For 32-bit code we can save PIC register setup by using
48004 __stack_chk_fail_local hidden function instead of calling
48005 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
48006 register, so it is better to call __stack_chk_fail directly. */
48008 static tree ATTRIBUTE_UNUSED
48009 ix86_stack_protect_fail (void)
48011 return TARGET_64BIT
48012 ? default_external_stack_protect_fail ()
48013 : default_hidden_stack_protect_fail ();
48016 /* Select a format to encode pointers in exception handling data. CODE
48017 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
48018 true if the symbol may be affected by dynamic relocations.
48020 ??? All x86 object file formats are capable of representing this.
48021 After all, the relocation needed is the same as for the call insn.
48022 Whether or not a particular assembler allows us to enter such, I
48023 guess we'll have to see. */
48025 asm_preferred_eh_data_format (int code, int global)
48029 int type = DW_EH_PE_sdata8;
48031 || ix86_cmodel == CM_SMALL_PIC
48032 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
48033 type = DW_EH_PE_sdata4;
48034 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
48036 if (ix86_cmodel == CM_SMALL
48037 || (ix86_cmodel == CM_MEDIUM && code))
48038 return DW_EH_PE_udata4;
48039 return DW_EH_PE_absptr;
48042 /* Expand copysign from SIGN to the positive value ABS_VALUE
48043 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
48046 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
48048 machine_mode mode = GET_MODE (sign);
48049 rtx sgn = gen_reg_rtx (mode);
48050 if (mask == NULL_RTX)
48052 machine_mode vmode;
48054 if (mode == SFmode)
48056 else if (mode == DFmode)
48061 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
48062 if (!VECTOR_MODE_P (mode))
48064 /* We need to generate a scalar mode mask in this case. */
48065 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48066 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48067 mask = gen_reg_rtx (mode);
48068 emit_insn (gen_rtx_SET (mask, tmp));
48072 mask = gen_rtx_NOT (mode, mask);
48073 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
48074 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
48077 /* Expand fabs (OP0) and return a new rtx that holds the result. The
48078 mask for masking out the sign-bit is stored in *SMASK, if that is
48081 ix86_expand_sse_fabs (rtx op0, rtx *smask)
48083 machine_mode vmode, mode = GET_MODE (op0);
48086 xa = gen_reg_rtx (mode);
48087 if (mode == SFmode)
48089 else if (mode == DFmode)
48093 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
48094 if (!VECTOR_MODE_P (mode))
48096 /* We need to generate a scalar mode mask in this case. */
48097 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48098 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48099 mask = gen_reg_rtx (mode);
48100 emit_insn (gen_rtx_SET (mask, tmp));
48102 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
48110 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
48111 swapping the operands if SWAP_OPERANDS is true. The expanded
48112 code is a forward jump to a newly created label in case the
48113 comparison is true. The generated label rtx is returned. */
48114 static rtx_code_label *
48115 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
48116 bool swap_operands)
48118 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
48119 rtx_code_label *label;
48123 std::swap (op0, op1);
48125 label = gen_label_rtx ();
48126 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
48127 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
48128 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
48129 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
48130 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
48131 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48132 JUMP_LABEL (tmp) = label;
48137 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
48138 using comparison code CODE. Operands are swapped for the comparison if
48139 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
48141 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
48142 bool swap_operands)
48144 rtx (*insn)(rtx, rtx, rtx, rtx);
48145 machine_mode mode = GET_MODE (op0);
48146 rtx mask = gen_reg_rtx (mode);
48149 std::swap (op0, op1);
48151 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
48153 emit_insn (insn (mask, op0, op1,
48154 gen_rtx_fmt_ee (code, mode, op0, op1)));
48158 /* Generate and return a rtx of mode MODE for 2**n where n is the number
48159 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
48161 ix86_gen_TWO52 (machine_mode mode)
48163 REAL_VALUE_TYPE TWO52r;
48166 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
48167 TWO52 = const_double_from_real_value (TWO52r, mode);
48168 TWO52 = force_reg (mode, TWO52);
48173 /* Expand SSE sequence for computing lround from OP1 storing
48176 ix86_expand_lround (rtx op0, rtx op1)
48178 /* C code for the stuff we're doing below:
48179 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
48182 machine_mode mode = GET_MODE (op1);
48183 const struct real_format *fmt;
48184 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48187 /* load nextafter (0.5, 0.0) */
48188 fmt = REAL_MODE_FORMAT (mode);
48189 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48190 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48192 /* adj = copysign (0.5, op1) */
48193 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
48194 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
48196 /* adj = op1 + adj */
48197 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
48199 /* op0 = (imode)adj */
48200 expand_fix (op0, adj, 0);
48203 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
48206 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
48208 /* C code for the stuff we're doing below (for do_floor):
48210 xi -= (double)xi > op1 ? 1 : 0;
48213 machine_mode fmode = GET_MODE (op1);
48214 machine_mode imode = GET_MODE (op0);
48215 rtx ireg, freg, tmp;
48216 rtx_code_label *label;
48218 /* reg = (long)op1 */
48219 ireg = gen_reg_rtx (imode);
48220 expand_fix (ireg, op1, 0);
48222 /* freg = (double)reg */
48223 freg = gen_reg_rtx (fmode);
48224 expand_float (freg, ireg, 0);
48226 /* ireg = (freg > op1) ? ireg - 1 : ireg */
48227 label = ix86_expand_sse_compare_and_jump (UNLE,
48228 freg, op1, !do_floor);
48229 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
48230 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
48231 emit_move_insn (ireg, tmp);
48233 emit_label (label);
48234 LABEL_NUSES (label) = 1;
48236 emit_move_insn (op0, ireg);
48239 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
48240 result in OPERAND0. */
48242 ix86_expand_rint (rtx operand0, rtx operand1)
48244 /* C code for the stuff we're doing below:
48245 xa = fabs (operand1);
48246 if (!isless (xa, 2**52))
48248 xa = xa + 2**52 - 2**52;
48249 return copysign (xa, operand1);
48251 machine_mode mode = GET_MODE (operand0);
48252 rtx res, xa, TWO52, mask;
48253 rtx_code_label *label;
48255 res = gen_reg_rtx (mode);
48256 emit_move_insn (res, operand1);
48258 /* xa = abs (operand1) */
48259 xa = ix86_expand_sse_fabs (res, &mask);
48261 /* if (!isless (xa, TWO52)) goto label; */
48262 TWO52 = ix86_gen_TWO52 (mode);
48263 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48265 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48266 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48268 ix86_sse_copysign_to_positive (res, xa, res, mask);
48270 emit_label (label);
48271 LABEL_NUSES (label) = 1;
48273 emit_move_insn (operand0, res);
48276 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48279 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
48281 /* C code for the stuff we expand below.
48282 double xa = fabs (x), x2;
48283 if (!isless (xa, TWO52))
48285 xa = xa + TWO52 - TWO52;
48286 x2 = copysign (xa, x);
48295 machine_mode mode = GET_MODE (operand0);
48296 rtx xa, TWO52, tmp, one, res, mask;
48297 rtx_code_label *label;
48299 TWO52 = ix86_gen_TWO52 (mode);
48301 /* Temporary for holding the result, initialized to the input
48302 operand to ease control flow. */
48303 res = gen_reg_rtx (mode);
48304 emit_move_insn (res, operand1);
48306 /* xa = abs (operand1) */
48307 xa = ix86_expand_sse_fabs (res, &mask);
48309 /* if (!isless (xa, TWO52)) goto label; */
48310 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48312 /* xa = xa + TWO52 - TWO52; */
48313 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48314 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48316 /* xa = copysign (xa, operand1) */
48317 ix86_sse_copysign_to_positive (xa, xa, res, mask);
48319 /* generate 1.0 or -1.0 */
48320 one = force_reg (mode,
48321 const_double_from_real_value (do_floor
48322 ? dconst1 : dconstm1, mode));
48324 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48325 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48326 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48327 /* We always need to subtract here to preserve signed zero. */
48328 tmp = expand_simple_binop (mode, MINUS,
48329 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48330 emit_move_insn (res, tmp);
48332 emit_label (label);
48333 LABEL_NUSES (label) = 1;
48335 emit_move_insn (operand0, res);
48338 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48341 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
48343 /* C code for the stuff we expand below.
48344 double xa = fabs (x), x2;
48345 if (!isless (xa, TWO52))
48347 x2 = (double)(long)x;
48354 if (HONOR_SIGNED_ZEROS (mode))
48355 return copysign (x2, x);
48358 machine_mode mode = GET_MODE (operand0);
48359 rtx xa, xi, TWO52, tmp, one, res, mask;
48360 rtx_code_label *label;
48362 TWO52 = ix86_gen_TWO52 (mode);
48364 /* Temporary for holding the result, initialized to the input
48365 operand to ease control flow. */
48366 res = gen_reg_rtx (mode);
48367 emit_move_insn (res, operand1);
48369 /* xa = abs (operand1) */
48370 xa = ix86_expand_sse_fabs (res, &mask);
48372 /* if (!isless (xa, TWO52)) goto label; */
48373 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48375 /* xa = (double)(long)x */
48376 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48377 expand_fix (xi, res, 0);
48378 expand_float (xa, xi, 0);
48381 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48383 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48384 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48385 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48386 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
48387 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48388 emit_move_insn (res, tmp);
48390 if (HONOR_SIGNED_ZEROS (mode))
48391 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48393 emit_label (label);
48394 LABEL_NUSES (label) = 1;
48396 emit_move_insn (operand0, res);
48399 /* Expand SSE sequence for computing round from OPERAND1 storing
48400 into OPERAND0. Sequence that works without relying on DImode truncation
48401 via cvttsd2siq that is only available on 64bit targets. */
48403 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
48405 /* C code for the stuff we expand below.
48406 double xa = fabs (x), xa2, x2;
48407 if (!isless (xa, TWO52))
48409 Using the absolute value and copying back sign makes
48410 -0.0 -> -0.0 correct.
48411 xa2 = xa + TWO52 - TWO52;
48416 else if (dxa > 0.5)
48418 x2 = copysign (xa2, x);
48421 machine_mode mode = GET_MODE (operand0);
48422 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
48423 rtx_code_label *label;
48425 TWO52 = ix86_gen_TWO52 (mode);
48427 /* Temporary for holding the result, initialized to the input
48428 operand to ease control flow. */
48429 res = gen_reg_rtx (mode);
48430 emit_move_insn (res, operand1);
48432 /* xa = abs (operand1) */
48433 xa = ix86_expand_sse_fabs (res, &mask);
48435 /* if (!isless (xa, TWO52)) goto label; */
48436 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48438 /* xa2 = xa + TWO52 - TWO52; */
48439 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48440 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
48442 /* dxa = xa2 - xa; */
48443 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
48445 /* generate 0.5, 1.0 and -0.5 */
48446 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
48447 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
48448 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
48452 tmp = gen_reg_rtx (mode);
48453 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
48454 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
48455 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48456 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48457 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
48458 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
48459 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48460 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48462 /* res = copysign (xa2, operand1) */
48463 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
48465 emit_label (label);
48466 LABEL_NUSES (label) = 1;
48468 emit_move_insn (operand0, res);
48471 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48474 ix86_expand_trunc (rtx operand0, rtx operand1)
48476 /* C code for SSE variant we expand below.
48477 double xa = fabs (x), x2;
48478 if (!isless (xa, TWO52))
48480 x2 = (double)(long)x;
48481 if (HONOR_SIGNED_ZEROS (mode))
48482 return copysign (x2, x);
48485 machine_mode mode = GET_MODE (operand0);
48486 rtx xa, xi, TWO52, res, mask;
48487 rtx_code_label *label;
48489 TWO52 = ix86_gen_TWO52 (mode);
48491 /* Temporary for holding the result, initialized to the input
48492 operand to ease control flow. */
48493 res = gen_reg_rtx (mode);
48494 emit_move_insn (res, operand1);
48496 /* xa = abs (operand1) */
48497 xa = ix86_expand_sse_fabs (res, &mask);
48499 /* if (!isless (xa, TWO52)) goto label; */
48500 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48502 /* x = (double)(long)x */
48503 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48504 expand_fix (xi, res, 0);
48505 expand_float (res, xi, 0);
48507 if (HONOR_SIGNED_ZEROS (mode))
48508 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48510 emit_label (label);
48511 LABEL_NUSES (label) = 1;
48513 emit_move_insn (operand0, res);
48516 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48519 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48521 machine_mode mode = GET_MODE (operand0);
48522 rtx xa, mask, TWO52, one, res, smask, tmp;
48523 rtx_code_label *label;
48525 /* C code for SSE variant we expand below.
48526 double xa = fabs (x), x2;
48527 if (!isless (xa, TWO52))
48529 xa2 = xa + TWO52 - TWO52;
48533 x2 = copysign (xa2, x);
48537 TWO52 = ix86_gen_TWO52 (mode);
48539 /* Temporary for holding the result, initialized to the input
48540 operand to ease control flow. */
48541 res = gen_reg_rtx (mode);
48542 emit_move_insn (res, operand1);
48544 /* xa = abs (operand1) */
48545 xa = ix86_expand_sse_fabs (res, &smask);
48547 /* if (!isless (xa, TWO52)) goto label; */
48548 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48550 /* res = xa + TWO52 - TWO52; */
48551 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48552 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48553 emit_move_insn (res, tmp);
48556 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48558 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48559 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48560 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48561 tmp = expand_simple_binop (mode, MINUS,
48562 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48563 emit_move_insn (res, tmp);
48565 /* res = copysign (res, operand1) */
48566 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48568 emit_label (label);
48569 LABEL_NUSES (label) = 1;
48571 emit_move_insn (operand0, res);
48574 /* Expand SSE sequence for computing round from OPERAND1 storing
48577 ix86_expand_round (rtx operand0, rtx operand1)
48579 /* C code for the stuff we're doing below:
48580 double xa = fabs (x);
48581 if (!isless (xa, TWO52))
48583 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48584 return copysign (xa, x);
48586 machine_mode mode = GET_MODE (operand0);
48587 rtx res, TWO52, xa, xi, half, mask;
48588 rtx_code_label *label;
48589 const struct real_format *fmt;
48590 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48592 /* Temporary for holding the result, initialized to the input
48593 operand to ease control flow. */
48594 res = gen_reg_rtx (mode);
48595 emit_move_insn (res, operand1);
48597 TWO52 = ix86_gen_TWO52 (mode);
48598 xa = ix86_expand_sse_fabs (res, &mask);
48599 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48601 /* load nextafter (0.5, 0.0) */
48602 fmt = REAL_MODE_FORMAT (mode);
48603 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48604 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48606 /* xa = xa + 0.5 */
48607 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48608 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48610 /* xa = (double)(int64_t)xa */
48611 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48612 expand_fix (xi, xa, 0);
48613 expand_float (xa, xi, 0);
48615 /* res = copysign (xa, operand1) */
48616 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48618 emit_label (label);
48619 LABEL_NUSES (label) = 1;
48621 emit_move_insn (operand0, res);
48624 /* Expand SSE sequence for computing round
48625 from OP1 storing into OP0 using sse4 round insn. */
48627 ix86_expand_round_sse4 (rtx op0, rtx op1)
48629 machine_mode mode = GET_MODE (op0);
48630 rtx e1, e2, res, half;
48631 const struct real_format *fmt;
48632 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48633 rtx (*gen_copysign) (rtx, rtx, rtx);
48634 rtx (*gen_round) (rtx, rtx, rtx);
48639 gen_copysign = gen_copysignsf3;
48640 gen_round = gen_sse4_1_roundsf2;
48643 gen_copysign = gen_copysigndf3;
48644 gen_round = gen_sse4_1_rounddf2;
48647 gcc_unreachable ();
48650 /* round (a) = trunc (a + copysign (0.5, a)) */
48652 /* load nextafter (0.5, 0.0) */
48653 fmt = REAL_MODE_FORMAT (mode);
48654 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48655 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48656 half = const_double_from_real_value (pred_half, mode);
48658 /* e1 = copysign (0.5, op1) */
48659 e1 = gen_reg_rtx (mode);
48660 emit_insn (gen_copysign (e1, half, op1));
48662 /* e2 = op1 + e1 */
48663 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48665 /* res = trunc (e2) */
48666 res = gen_reg_rtx (mode);
48667 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48669 emit_move_insn (op0, res);
48673 /* Table of valid machine attributes. */
48674 static const struct attribute_spec ix86_attribute_table[] =
48676 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48677 affects_type_identity } */
48678 /* Stdcall attribute says callee is responsible for popping arguments
48679 if they are not variable. */
48680 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48682 /* Fastcall attribute says callee is responsible for popping arguments
48683 if they are not variable. */
48684 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48686 /* Thiscall attribute says callee is responsible for popping arguments
48687 if they are not variable. */
48688 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48690 /* Cdecl attribute says the callee is a normal C declaration */
48691 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48693 /* Regparm attribute specifies how many integer arguments are to be
48694 passed in registers. */
48695 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48697 /* Sseregparm attribute says we are using x86_64 calling conventions
48698 for FP arguments. */
48699 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48701 /* The transactional memory builtins are implicitly regparm or fastcall
48702 depending on the ABI. Override the generic do-nothing attribute that
48703 these builtins were declared with. */
48704 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48706 /* force_align_arg_pointer says this function realigns the stack at entry. */
48707 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48708 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48709 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48710 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48711 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48712 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48715 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48717 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48719 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48720 SUBTARGET_ATTRIBUTE_TABLE,
48722 /* ms_abi and sysv_abi calling convention function attributes. */
48723 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48724 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48725 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48727 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48728 ix86_handle_callee_pop_aggregate_return, true },
48730 { NULL, 0, 0, false, false, false, NULL, false }
48733 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48735 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48740 switch (type_of_cost)
48743 return ix86_cost->scalar_stmt_cost;
48746 return ix86_cost->scalar_load_cost;
48749 return ix86_cost->scalar_store_cost;
48752 return ix86_cost->vec_stmt_cost;
48755 return ix86_cost->vec_align_load_cost;
48758 return ix86_cost->vec_store_cost;
48760 case vec_to_scalar:
48761 return ix86_cost->vec_to_scalar_cost;
48763 case scalar_to_vec:
48764 return ix86_cost->scalar_to_vec_cost;
48766 case unaligned_load:
48767 case unaligned_store:
48768 return ix86_cost->vec_unalign_load_cost;
48770 case cond_branch_taken:
48771 return ix86_cost->cond_taken_branch_cost;
48773 case cond_branch_not_taken:
48774 return ix86_cost->cond_not_taken_branch_cost;
48777 case vec_promote_demote:
48778 return ix86_cost->vec_stmt_cost;
48780 case vec_construct:
48781 elements = TYPE_VECTOR_SUBPARTS (vectype);
48782 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48785 gcc_unreachable ();
48789 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48790 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48791 insn every time. */
48793 static GTY(()) rtx_insn *vselect_insn;
48795 /* Initialize vselect_insn. */
48798 init_vselect_insn (void)
48803 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48804 for (i = 0; i < MAX_VECT_LEN; ++i)
48805 XVECEXP (x, 0, i) = const0_rtx;
48806 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48808 x = gen_rtx_SET (const0_rtx, x);
48810 vselect_insn = emit_insn (x);
48814 /* Construct (set target (vec_select op0 (parallel perm))) and
48815 return true if that's a valid instruction in the active ISA. */
48818 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48819 unsigned nelt, bool testing_p)
48822 rtx x, save_vconcat;
48825 if (vselect_insn == NULL_RTX)
48826 init_vselect_insn ();
48828 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48829 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48830 for (i = 0; i < nelt; ++i)
48831 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48832 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48833 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48834 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48835 SET_DEST (PATTERN (vselect_insn)) = target;
48836 icode = recog_memoized (vselect_insn);
48838 if (icode >= 0 && !testing_p)
48839 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48841 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48842 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48843 INSN_CODE (vselect_insn) = -1;
48848 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48851 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48852 const unsigned char *perm, unsigned nelt,
48855 machine_mode v2mode;
48859 if (vselect_insn == NULL_RTX)
48860 init_vselect_insn ();
48862 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48863 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48864 PUT_MODE (x, v2mode);
48867 ok = expand_vselect (target, x, perm, nelt, testing_p);
48868 XEXP (x, 0) = const0_rtx;
48869 XEXP (x, 1) = const0_rtx;
48873 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48874 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48877 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48879 machine_mode mmode, vmode = d->vmode;
48880 unsigned i, mask, nelt = d->nelt;
48881 rtx target, op0, op1, maskop, x;
48882 rtx rperm[32], vperm;
48884 if (d->one_operand_p)
48886 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48887 && (TARGET_AVX512BW
48888 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48890 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48892 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48894 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48899 /* This is a blend, not a permute. Elements must stay in their
48900 respective lanes. */
48901 for (i = 0; i < nelt; ++i)
48903 unsigned e = d->perm[i];
48904 if (!(e == i || e == i + nelt))
48911 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48912 decision should be extracted elsewhere, so that we only try that
48913 sequence once all budget==3 options have been tried. */
48914 target = d->target;
48933 for (i = 0; i < nelt; ++i)
48934 mask |= (d->perm[i] >= nelt) << i;
48938 for (i = 0; i < 2; ++i)
48939 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48944 for (i = 0; i < 4; ++i)
48945 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48950 /* See if bytes move in pairs so we can use pblendw with
48951 an immediate argument, rather than pblendvb with a vector
48953 for (i = 0; i < 16; i += 2)
48954 if (d->perm[i] + 1 != d->perm[i + 1])
48957 for (i = 0; i < nelt; ++i)
48958 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
48961 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
48962 vperm = force_reg (vmode, vperm);
48964 if (GET_MODE_SIZE (vmode) == 16)
48965 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
48967 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
48968 if (target != d->target)
48969 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48973 for (i = 0; i < 8; ++i)
48974 mask |= (d->perm[i * 2] >= 16) << i;
48979 target = gen_reg_rtx (vmode);
48980 op0 = gen_lowpart (vmode, op0);
48981 op1 = gen_lowpart (vmode, op1);
48985 /* See if bytes move in pairs. If not, vpblendvb must be used. */
48986 for (i = 0; i < 32; i += 2)
48987 if (d->perm[i] + 1 != d->perm[i + 1])
48989 /* See if bytes move in quadruplets. If yes, vpblendd
48990 with immediate can be used. */
48991 for (i = 0; i < 32; i += 4)
48992 if (d->perm[i] + 2 != d->perm[i + 2])
48996 /* See if bytes move the same in both lanes. If yes,
48997 vpblendw with immediate can be used. */
48998 for (i = 0; i < 16; i += 2)
48999 if (d->perm[i] + 16 != d->perm[i + 16])
49002 /* Use vpblendw. */
49003 for (i = 0; i < 16; ++i)
49004 mask |= (d->perm[i * 2] >= 32) << i;
49009 /* Use vpblendd. */
49010 for (i = 0; i < 8; ++i)
49011 mask |= (d->perm[i * 4] >= 32) << i;
49016 /* See if words move in pairs. If yes, vpblendd can be used. */
49017 for (i = 0; i < 16; i += 2)
49018 if (d->perm[i] + 1 != d->perm[i + 1])
49022 /* See if words move the same in both lanes. If not,
49023 vpblendvb must be used. */
49024 for (i = 0; i < 8; i++)
49025 if (d->perm[i] + 8 != d->perm[i + 8])
49027 /* Use vpblendvb. */
49028 for (i = 0; i < 32; ++i)
49029 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
49033 target = gen_reg_rtx (vmode);
49034 op0 = gen_lowpart (vmode, op0);
49035 op1 = gen_lowpart (vmode, op1);
49036 goto finish_pblendvb;
49039 /* Use vpblendw. */
49040 for (i = 0; i < 16; ++i)
49041 mask |= (d->perm[i] >= 16) << i;
49045 /* Use vpblendd. */
49046 for (i = 0; i < 8; ++i)
49047 mask |= (d->perm[i * 2] >= 16) << i;
49052 /* Use vpblendd. */
49053 for (i = 0; i < 4; ++i)
49054 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
49059 gcc_unreachable ();
49082 if (mmode != VOIDmode)
49083 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
49085 maskop = GEN_INT (mask);
49087 /* This matches five different patterns with the different modes. */
49088 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
49089 x = gen_rtx_SET (target, x);
49091 if (target != d->target)
49092 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49097 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49098 in terms of the variable form of vpermilps.
49100 Note that we will have already failed the immediate input vpermilps,
49101 which requires that the high and low part shuffle be identical; the
49102 variable form doesn't require that. */
49105 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
49107 rtx rperm[8], vperm;
49110 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
49113 /* We can only permute within the 128-bit lane. */
49114 for (i = 0; i < 8; ++i)
49116 unsigned e = d->perm[i];
49117 if (i < 4 ? e >= 4 : e < 4)
49124 for (i = 0; i < 8; ++i)
49126 unsigned e = d->perm[i];
49128 /* Within each 128-bit lane, the elements of op0 are numbered
49129 from 0 and the elements of op1 are numbered from 4. */
49135 rperm[i] = GEN_INT (e);
49138 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
49139 vperm = force_reg (V8SImode, vperm);
49140 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
49145 /* Return true if permutation D can be performed as VMODE permutation
49149 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
49151 unsigned int i, j, chunk;
49153 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
49154 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
49155 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
49158 if (GET_MODE_NUNITS (vmode) >= d->nelt)
49161 chunk = d->nelt / GET_MODE_NUNITS (vmode);
49162 for (i = 0; i < d->nelt; i += chunk)
49163 if (d->perm[i] & (chunk - 1))
49166 for (j = 1; j < chunk; ++j)
49167 if (d->perm[i] + j != d->perm[i + j])
49173 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49174 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
49177 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
49179 unsigned i, nelt, eltsz, mask;
49180 unsigned char perm[64];
49181 machine_mode vmode = V16QImode;
49182 rtx rperm[64], vperm, target, op0, op1;
49186 if (!d->one_operand_p)
49188 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
49191 && valid_perm_using_mode_p (V2TImode, d))
49196 /* Use vperm2i128 insn. The pattern uses
49197 V4DImode instead of V2TImode. */
49198 target = d->target;
49199 if (d->vmode != V4DImode)
49200 target = gen_reg_rtx (V4DImode);
49201 op0 = gen_lowpart (V4DImode, d->op0);
49202 op1 = gen_lowpart (V4DImode, d->op1);
49204 = GEN_INT ((d->perm[0] / (nelt / 2))
49205 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
49206 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
49207 if (target != d->target)
49208 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49216 if (GET_MODE_SIZE (d->vmode) == 16)
49221 else if (GET_MODE_SIZE (d->vmode) == 32)
49226 /* V4DImode should be already handled through
49227 expand_vselect by vpermq instruction. */
49228 gcc_assert (d->vmode != V4DImode);
49231 if (d->vmode == V8SImode
49232 || d->vmode == V16HImode
49233 || d->vmode == V32QImode)
49235 /* First see if vpermq can be used for
49236 V8SImode/V16HImode/V32QImode. */
49237 if (valid_perm_using_mode_p (V4DImode, d))
49239 for (i = 0; i < 4; i++)
49240 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
49243 target = gen_reg_rtx (V4DImode);
49244 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
49247 emit_move_insn (d->target,
49248 gen_lowpart (d->vmode, target));
49254 /* Next see if vpermd can be used. */
49255 if (valid_perm_using_mode_p (V8SImode, d))
49258 /* Or if vpermps can be used. */
49259 else if (d->vmode == V8SFmode)
49262 if (vmode == V32QImode)
49264 /* vpshufb only works intra lanes, it is not
49265 possible to shuffle bytes in between the lanes. */
49266 for (i = 0; i < nelt; ++i)
49267 if ((d->perm[i] ^ i) & (nelt / 2))
49271 else if (GET_MODE_SIZE (d->vmode) == 64)
49273 if (!TARGET_AVX512BW)
49276 /* If vpermq didn't work, vpshufb won't work either. */
49277 if (d->vmode == V8DFmode || d->vmode == V8DImode)
49281 if (d->vmode == V16SImode
49282 || d->vmode == V32HImode
49283 || d->vmode == V64QImode)
49285 /* First see if vpermq can be used for
49286 V16SImode/V32HImode/V64QImode. */
49287 if (valid_perm_using_mode_p (V8DImode, d))
49289 for (i = 0; i < 8; i++)
49290 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
49293 target = gen_reg_rtx (V8DImode);
49294 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
49297 emit_move_insn (d->target,
49298 gen_lowpart (d->vmode, target));
49304 /* Next see if vpermd can be used. */
49305 if (valid_perm_using_mode_p (V16SImode, d))
49308 /* Or if vpermps can be used. */
49309 else if (d->vmode == V16SFmode)
49311 if (vmode == V64QImode)
49313 /* vpshufb only works intra lanes, it is not
49314 possible to shuffle bytes in between the lanes. */
49315 for (i = 0; i < nelt; ++i)
49316 if ((d->perm[i] ^ i) & (nelt / 4))
49327 if (vmode == V8SImode)
49328 for (i = 0; i < 8; ++i)
49329 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
49330 else if (vmode == V16SImode)
49331 for (i = 0; i < 16; ++i)
49332 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
49335 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49336 if (!d->one_operand_p)
49337 mask = 2 * nelt - 1;
49338 else if (vmode == V16QImode)
49340 else if (vmode == V64QImode)
49341 mask = nelt / 4 - 1;
49343 mask = nelt / 2 - 1;
49345 for (i = 0; i < nelt; ++i)
49347 unsigned j, e = d->perm[i] & mask;
49348 for (j = 0; j < eltsz; ++j)
49349 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
49353 vperm = gen_rtx_CONST_VECTOR (vmode,
49354 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
49355 vperm = force_reg (vmode, vperm);
49357 target = d->target;
49358 if (d->vmode != vmode)
49359 target = gen_reg_rtx (vmode);
49360 op0 = gen_lowpart (vmode, d->op0);
49361 if (d->one_operand_p)
49363 if (vmode == V16QImode)
49364 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
49365 else if (vmode == V32QImode)
49366 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
49367 else if (vmode == V64QImode)
49368 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
49369 else if (vmode == V8SFmode)
49370 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
49371 else if (vmode == V8SImode)
49372 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
49373 else if (vmode == V16SFmode)
49374 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
49375 else if (vmode == V16SImode)
49376 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
49378 gcc_unreachable ();
49382 op1 = gen_lowpart (vmode, d->op1);
49383 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
49385 if (target != d->target)
49386 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49391 /* For V*[QHS]Imode permutations, check if the same permutation
49392 can't be performed in a 2x, 4x or 8x wider inner mode. */
49395 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
49396 struct expand_vec_perm_d *nd)
49399 enum machine_mode mode = VOIDmode;
49403 case V16QImode: mode = V8HImode; break;
49404 case V32QImode: mode = V16HImode; break;
49405 case V64QImode: mode = V32HImode; break;
49406 case V8HImode: mode = V4SImode; break;
49407 case V16HImode: mode = V8SImode; break;
49408 case V32HImode: mode = V16SImode; break;
49409 case V4SImode: mode = V2DImode; break;
49410 case V8SImode: mode = V4DImode; break;
49411 case V16SImode: mode = V8DImode; break;
49412 default: return false;
49414 for (i = 0; i < d->nelt; i += 2)
49415 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
49418 nd->nelt = d->nelt / 2;
49419 for (i = 0; i < nd->nelt; i++)
49420 nd->perm[i] = d->perm[2 * i] / 2;
49421 if (GET_MODE_INNER (mode) != DImode)
49422 canonicalize_vector_int_perm (nd, nd);
49425 nd->one_operand_p = d->one_operand_p;
49426 nd->testing_p = d->testing_p;
49427 if (d->op0 == d->op1)
49428 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
49431 nd->op0 = gen_lowpart (nd->vmode, d->op0);
49432 nd->op1 = gen_lowpart (nd->vmode, d->op1);
49435 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
49437 nd->target = gen_reg_rtx (nd->vmode);
49442 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
49443 in a single instruction. */
49446 expand_vec_perm_1 (struct expand_vec_perm_d *d)
49448 unsigned i, nelt = d->nelt;
49449 struct expand_vec_perm_d nd;
49451 /* Check plain VEC_SELECT first, because AVX has instructions that could
49452 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
49453 input where SEL+CONCAT may not. */
49454 if (d->one_operand_p)
49456 int mask = nelt - 1;
49457 bool identity_perm = true;
49458 bool broadcast_perm = true;
49460 for (i = 0; i < nelt; i++)
49462 nd.perm[i] = d->perm[i] & mask;
49463 if (nd.perm[i] != i)
49464 identity_perm = false;
49466 broadcast_perm = false;
49472 emit_move_insn (d->target, d->op0);
49475 else if (broadcast_perm && TARGET_AVX2)
49477 /* Use vpbroadcast{b,w,d}. */
49478 rtx (*gen) (rtx, rtx) = NULL;
49482 if (TARGET_AVX512BW)
49483 gen = gen_avx512bw_vec_dupv64qi_1;
49486 gen = gen_avx2_pbroadcastv32qi_1;
49489 if (TARGET_AVX512BW)
49490 gen = gen_avx512bw_vec_dupv32hi_1;
49493 gen = gen_avx2_pbroadcastv16hi_1;
49496 if (TARGET_AVX512F)
49497 gen = gen_avx512f_vec_dupv16si_1;
49500 gen = gen_avx2_pbroadcastv8si_1;
49503 gen = gen_avx2_pbroadcastv16qi;
49506 gen = gen_avx2_pbroadcastv8hi;
49509 if (TARGET_AVX512F)
49510 gen = gen_avx512f_vec_dupv16sf_1;
49513 gen = gen_avx2_vec_dupv8sf_1;
49516 if (TARGET_AVX512F)
49517 gen = gen_avx512f_vec_dupv8df_1;
49520 if (TARGET_AVX512F)
49521 gen = gen_avx512f_vec_dupv8di_1;
49523 /* For other modes prefer other shuffles this function creates. */
49529 emit_insn (gen (d->target, d->op0));
49534 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
49537 /* There are plenty of patterns in sse.md that are written for
49538 SEL+CONCAT and are not replicated for a single op. Perhaps
49539 that should be changed, to avoid the nastiness here. */
49541 /* Recognize interleave style patterns, which means incrementing
49542 every other permutation operand. */
49543 for (i = 0; i < nelt; i += 2)
49545 nd.perm[i] = d->perm[i] & mask;
49546 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
49548 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49552 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
49555 for (i = 0; i < nelt; i += 4)
49557 nd.perm[i + 0] = d->perm[i + 0] & mask;
49558 nd.perm[i + 1] = d->perm[i + 1] & mask;
49559 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
49560 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
49563 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49569 /* Finally, try the fully general two operand permute. */
49570 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49574 /* Recognize interleave style patterns with reversed operands. */
49575 if (!d->one_operand_p)
49577 for (i = 0; i < nelt; ++i)
49579 unsigned e = d->perm[i];
49587 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
49592 /* Try the SSE4.1 blend variable merge instructions. */
49593 if (expand_vec_perm_blend (d))
49596 /* Try one of the AVX vpermil variable permutations. */
49597 if (expand_vec_perm_vpermil (d))
49600 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49601 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49602 if (expand_vec_perm_pshufb (d))
49605 /* Try the AVX2 vpalignr instruction. */
49606 if (expand_vec_perm_palignr (d, true))
49609 /* Try the AVX512F vpermi2 instructions. */
49610 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49613 /* See if we can get the same permutation in different vector integer
49615 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49618 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49624 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49625 in terms of a pair of pshuflw + pshufhw instructions. */
49628 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49630 unsigned char perm2[MAX_VECT_LEN];
49634 if (d->vmode != V8HImode || !d->one_operand_p)
49637 /* The two permutations only operate in 64-bit lanes. */
49638 for (i = 0; i < 4; ++i)
49639 if (d->perm[i] >= 4)
49641 for (i = 4; i < 8; ++i)
49642 if (d->perm[i] < 4)
49648 /* Emit the pshuflw. */
49649 memcpy (perm2, d->perm, 4);
49650 for (i = 4; i < 8; ++i)
49652 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49655 /* Emit the pshufhw. */
49656 memcpy (perm2 + 4, d->perm + 4, 4);
49657 for (i = 0; i < 4; ++i)
49659 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49665 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49666 the permutation using the SSSE3 palignr instruction. This succeeds
49667 when all of the elements in PERM fit within one vector and we merely
49668 need to shift them down so that a single vector permutation has a
49669 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49670 the vpalignr instruction itself can perform the requested permutation. */
49673 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49675 unsigned i, nelt = d->nelt;
49676 unsigned min, max, minswap, maxswap;
49677 bool in_order, ok, swap = false;
49679 struct expand_vec_perm_d dcopy;
49681 /* Even with AVX, palignr only operates on 128-bit vectors,
49682 in AVX2 palignr operates on both 128-bit lanes. */
49683 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49684 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49689 minswap = 2 * nelt;
49691 for (i = 0; i < nelt; ++i)
49693 unsigned e = d->perm[i];
49694 unsigned eswap = d->perm[i] ^ nelt;
49695 if (GET_MODE_SIZE (d->vmode) == 32)
49697 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49698 eswap = e ^ (nelt / 2);
49704 if (eswap < minswap)
49706 if (eswap > maxswap)
49710 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49712 if (d->one_operand_p
49714 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49715 ? nelt / 2 : nelt))
49722 /* Given that we have SSSE3, we know we'll be able to implement the
49723 single operand permutation after the palignr with pshufb for
49724 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49726 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49732 dcopy.op0 = d->op1;
49733 dcopy.op1 = d->op0;
49734 for (i = 0; i < nelt; ++i)
49735 dcopy.perm[i] ^= nelt;
49739 for (i = 0; i < nelt; ++i)
49741 unsigned e = dcopy.perm[i];
49742 if (GET_MODE_SIZE (d->vmode) == 32
49744 && (e & (nelt / 2 - 1)) < min)
49745 e = e - min - (nelt / 2);
49752 dcopy.one_operand_p = true;
49754 if (single_insn_only_p && !in_order)
49757 /* For AVX2, test whether we can permute the result in one instruction. */
49762 dcopy.op1 = dcopy.op0;
49763 return expand_vec_perm_1 (&dcopy);
49766 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49767 if (GET_MODE_SIZE (d->vmode) == 16)
49769 target = gen_reg_rtx (TImode);
49770 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49771 gen_lowpart (TImode, dcopy.op0), shift));
49775 target = gen_reg_rtx (V2TImode);
49776 emit_insn (gen_avx2_palignrv2ti (target,
49777 gen_lowpart (V2TImode, dcopy.op1),
49778 gen_lowpart (V2TImode, dcopy.op0),
49782 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49784 /* Test for the degenerate case where the alignment by itself
49785 produces the desired permutation. */
49788 emit_move_insn (d->target, dcopy.op0);
49792 ok = expand_vec_perm_1 (&dcopy);
49793 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49798 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49799 the permutation using the SSE4_1 pblendv instruction. Potentially
49800 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49803 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49805 unsigned i, which, nelt = d->nelt;
49806 struct expand_vec_perm_d dcopy, dcopy1;
49807 machine_mode vmode = d->vmode;
49810 /* Use the same checks as in expand_vec_perm_blend. */
49811 if (d->one_operand_p)
49813 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49815 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49817 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49822 /* Figure out where permutation elements stay not in their
49823 respective lanes. */
49824 for (i = 0, which = 0; i < nelt; ++i)
49826 unsigned e = d->perm[i];
49828 which |= (e < nelt ? 1 : 2);
49830 /* We can pblend the part where elements stay not in their
49831 respective lanes only when these elements are all in one
49832 half of a permutation.
49833 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49834 lanes, but both 8 and 9 >= 8
49835 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49836 respective lanes and 8 >= 8, but 2 not. */
49837 if (which != 1 && which != 2)
49839 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49842 /* First we apply one operand permutation to the part where
49843 elements stay not in their respective lanes. */
49846 dcopy.op0 = dcopy.op1 = d->op1;
49848 dcopy.op0 = dcopy.op1 = d->op0;
49850 dcopy.target = gen_reg_rtx (vmode);
49851 dcopy.one_operand_p = true;
49853 for (i = 0; i < nelt; ++i)
49854 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49856 ok = expand_vec_perm_1 (&dcopy);
49857 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49864 /* Next we put permuted elements into their positions. */
49867 dcopy1.op1 = dcopy.target;
49869 dcopy1.op0 = dcopy.target;
49871 for (i = 0; i < nelt; ++i)
49872 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49874 ok = expand_vec_perm_blend (&dcopy1);
49880 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49882 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49883 a two vector permutation into a single vector permutation by using
49884 an interleave operation to merge the vectors. */
49887 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49889 struct expand_vec_perm_d dremap, dfinal;
49890 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49891 unsigned HOST_WIDE_INT contents;
49892 unsigned char remap[2 * MAX_VECT_LEN];
49894 bool ok, same_halves = false;
49896 if (GET_MODE_SIZE (d->vmode) == 16)
49898 if (d->one_operand_p)
49901 else if (GET_MODE_SIZE (d->vmode) == 32)
49905 /* For 32-byte modes allow even d->one_operand_p.
49906 The lack of cross-lane shuffling in some instructions
49907 might prevent a single insn shuffle. */
49909 dfinal.testing_p = true;
49910 /* If expand_vec_perm_interleave3 can expand this into
49911 a 3 insn sequence, give up and let it be expanded as
49912 3 insn sequence. While that is one insn longer,
49913 it doesn't need a memory operand and in the common
49914 case that both interleave low and high permutations
49915 with the same operands are adjacent needs 4 insns
49916 for both after CSE. */
49917 if (expand_vec_perm_interleave3 (&dfinal))
49923 /* Examine from whence the elements come. */
49925 for (i = 0; i < nelt; ++i)
49926 contents |= HOST_WIDE_INT_1U << d->perm[i];
49928 memset (remap, 0xff, sizeof (remap));
49931 if (GET_MODE_SIZE (d->vmode) == 16)
49933 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49935 /* Split the two input vectors into 4 halves. */
49936 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49941 /* If the elements from the low halves use interleave low, and similarly
49942 for interleave high. If the elements are from mis-matched halves, we
49943 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49944 if ((contents & (h1 | h3)) == contents)
49947 for (i = 0; i < nelt2; ++i)
49950 remap[i + nelt] = i * 2 + 1;
49951 dremap.perm[i * 2] = i;
49952 dremap.perm[i * 2 + 1] = i + nelt;
49954 if (!TARGET_SSE2 && d->vmode == V4SImode)
49955 dremap.vmode = V4SFmode;
49957 else if ((contents & (h2 | h4)) == contents)
49960 for (i = 0; i < nelt2; ++i)
49962 remap[i + nelt2] = i * 2;
49963 remap[i + nelt + nelt2] = i * 2 + 1;
49964 dremap.perm[i * 2] = i + nelt2;
49965 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
49967 if (!TARGET_SSE2 && d->vmode == V4SImode)
49968 dremap.vmode = V4SFmode;
49970 else if ((contents & (h1 | h4)) == contents)
49973 for (i = 0; i < nelt2; ++i)
49976 remap[i + nelt + nelt2] = i + nelt2;
49977 dremap.perm[i] = i;
49978 dremap.perm[i + nelt2] = i + nelt + nelt2;
49983 dremap.vmode = V2DImode;
49985 dremap.perm[0] = 0;
49986 dremap.perm[1] = 3;
49989 else if ((contents & (h2 | h3)) == contents)
49992 for (i = 0; i < nelt2; ++i)
49994 remap[i + nelt2] = i;
49995 remap[i + nelt] = i + nelt2;
49996 dremap.perm[i] = i + nelt2;
49997 dremap.perm[i + nelt2] = i + nelt;
50002 dremap.vmode = V2DImode;
50004 dremap.perm[0] = 1;
50005 dremap.perm[1] = 2;
50013 unsigned int nelt4 = nelt / 4, nzcnt = 0;
50014 unsigned HOST_WIDE_INT q[8];
50015 unsigned int nonzero_halves[4];
50017 /* Split the two input vectors into 8 quarters. */
50018 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
50019 for (i = 1; i < 8; ++i)
50020 q[i] = q[0] << (nelt4 * i);
50021 for (i = 0; i < 4; ++i)
50022 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
50024 nonzero_halves[nzcnt] = i;
50030 gcc_assert (d->one_operand_p);
50031 nonzero_halves[1] = nonzero_halves[0];
50032 same_halves = true;
50034 else if (d->one_operand_p)
50036 gcc_assert (nonzero_halves[0] == 0);
50037 gcc_assert (nonzero_halves[1] == 1);
50042 if (d->perm[0] / nelt2 == nonzero_halves[1])
50044 /* Attempt to increase the likelihood that dfinal
50045 shuffle will be intra-lane. */
50046 std::swap (nonzero_halves[0], nonzero_halves[1]);
50049 /* vperm2f128 or vperm2i128. */
50050 for (i = 0; i < nelt2; ++i)
50052 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
50053 remap[i + nonzero_halves[0] * nelt2] = i;
50054 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
50055 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
50058 if (d->vmode != V8SFmode
50059 && d->vmode != V4DFmode
50060 && d->vmode != V8SImode)
50062 dremap.vmode = V8SImode;
50064 for (i = 0; i < 4; ++i)
50066 dremap.perm[i] = i + nonzero_halves[0] * 4;
50067 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
50071 else if (d->one_operand_p)
50073 else if (TARGET_AVX2
50074 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
50077 for (i = 0; i < nelt4; ++i)
50080 remap[i + nelt] = i * 2 + 1;
50081 remap[i + nelt2] = i * 2 + nelt2;
50082 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
50083 dremap.perm[i * 2] = i;
50084 dremap.perm[i * 2 + 1] = i + nelt;
50085 dremap.perm[i * 2 + nelt2] = i + nelt2;
50086 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
50089 else if (TARGET_AVX2
50090 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
50093 for (i = 0; i < nelt4; ++i)
50095 remap[i + nelt4] = i * 2;
50096 remap[i + nelt + nelt4] = i * 2 + 1;
50097 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
50098 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
50099 dremap.perm[i * 2] = i + nelt4;
50100 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
50101 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
50102 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
50109 /* Use the remapping array set up above to move the elements from their
50110 swizzled locations into their final destinations. */
50112 for (i = 0; i < nelt; ++i)
50114 unsigned e = remap[d->perm[i]];
50115 gcc_assert (e < nelt);
50116 /* If same_halves is true, both halves of the remapped vector are the
50117 same. Avoid cross-lane accesses if possible. */
50118 if (same_halves && i >= nelt2)
50120 gcc_assert (e < nelt2);
50121 dfinal.perm[i] = e + nelt2;
50124 dfinal.perm[i] = e;
50128 dremap.target = gen_reg_rtx (dremap.vmode);
50129 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50131 dfinal.op1 = dfinal.op0;
50132 dfinal.one_operand_p = true;
50134 /* Test if the final remap can be done with a single insn. For V4SFmode or
50135 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
50137 ok = expand_vec_perm_1 (&dfinal);
50138 seq = get_insns ();
50147 if (dremap.vmode != dfinal.vmode)
50149 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
50150 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
50153 ok = expand_vec_perm_1 (&dremap);
50160 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50161 a single vector cross-lane permutation into vpermq followed
50162 by any of the single insn permutations. */
50165 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
50167 struct expand_vec_perm_d dremap, dfinal;
50168 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
50169 unsigned contents[2];
50173 && (d->vmode == V32QImode || d->vmode == V16HImode)
50174 && d->one_operand_p))
50179 for (i = 0; i < nelt2; ++i)
50181 contents[0] |= 1u << (d->perm[i] / nelt4);
50182 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
50185 for (i = 0; i < 2; ++i)
50187 unsigned int cnt = 0;
50188 for (j = 0; j < 4; ++j)
50189 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
50197 dremap.vmode = V4DImode;
50199 dremap.target = gen_reg_rtx (V4DImode);
50200 dremap.op0 = gen_lowpart (V4DImode, d->op0);
50201 dremap.op1 = dremap.op0;
50202 dremap.one_operand_p = true;
50203 for (i = 0; i < 2; ++i)
50205 unsigned int cnt = 0;
50206 for (j = 0; j < 4; ++j)
50207 if ((contents[i] & (1u << j)) != 0)
50208 dremap.perm[2 * i + cnt++] = j;
50209 for (; cnt < 2; ++cnt)
50210 dremap.perm[2 * i + cnt] = 0;
50214 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50215 dfinal.op1 = dfinal.op0;
50216 dfinal.one_operand_p = true;
50217 for (i = 0, j = 0; i < nelt; ++i)
50221 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
50222 if ((d->perm[i] / nelt4) == dremap.perm[j])
50224 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
50225 dfinal.perm[i] |= nelt4;
50227 gcc_unreachable ();
50230 ok = expand_vec_perm_1 (&dremap);
50233 ok = expand_vec_perm_1 (&dfinal);
50239 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
50240 a vector permutation using two instructions, vperm2f128 resp.
50241 vperm2i128 followed by any single in-lane permutation. */
50244 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
50246 struct expand_vec_perm_d dfirst, dsecond;
50247 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
50251 || GET_MODE_SIZE (d->vmode) != 32
50252 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
50256 dsecond.one_operand_p = false;
50257 dsecond.testing_p = true;
50259 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
50260 immediate. For perm < 16 the second permutation uses
50261 d->op0 as first operand, for perm >= 16 it uses d->op1
50262 as first operand. The second operand is the result of
50264 for (perm = 0; perm < 32; perm++)
50266 /* Ignore permutations which do not move anything cross-lane. */
50269 /* The second shuffle for e.g. V4DFmode has
50270 0123 and ABCD operands.
50271 Ignore AB23, as 23 is already in the second lane
50272 of the first operand. */
50273 if ((perm & 0xc) == (1 << 2)) continue;
50274 /* And 01CD, as 01 is in the first lane of the first
50276 if ((perm & 3) == 0) continue;
50277 /* And 4567, as then the vperm2[fi]128 doesn't change
50278 anything on the original 4567 second operand. */
50279 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
50283 /* The second shuffle for e.g. V4DFmode has
50284 4567 and ABCD operands.
50285 Ignore AB67, as 67 is already in the second lane
50286 of the first operand. */
50287 if ((perm & 0xc) == (3 << 2)) continue;
50288 /* And 45CD, as 45 is in the first lane of the first
50290 if ((perm & 3) == 2) continue;
50291 /* And 0123, as then the vperm2[fi]128 doesn't change
50292 anything on the original 0123 first operand. */
50293 if ((perm & 0xf) == (1 << 2)) continue;
50296 for (i = 0; i < nelt; i++)
50298 j = d->perm[i] / nelt2;
50299 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
50300 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
50301 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
50302 dsecond.perm[i] = d->perm[i] & (nelt - 1);
50310 ok = expand_vec_perm_1 (&dsecond);
50321 /* Found a usable second shuffle. dfirst will be
50322 vperm2f128 on d->op0 and d->op1. */
50323 dsecond.testing_p = false;
50325 dfirst.target = gen_reg_rtx (d->vmode);
50326 for (i = 0; i < nelt; i++)
50327 dfirst.perm[i] = (i & (nelt2 - 1))
50328 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
50330 canonicalize_perm (&dfirst);
50331 ok = expand_vec_perm_1 (&dfirst);
50334 /* And dsecond is some single insn shuffle, taking
50335 d->op0 and result of vperm2f128 (if perm < 16) or
50336 d->op1 and result of vperm2f128 (otherwise). */
50338 dsecond.op0 = dsecond.op1;
50339 dsecond.op1 = dfirst.target;
50341 ok = expand_vec_perm_1 (&dsecond);
50347 /* For one operand, the only useful vperm2f128 permutation is 0x01
50349 if (d->one_operand_p)
50356 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50357 a two vector permutation using 2 intra-lane interleave insns
50358 and cross-lane shuffle for 32-byte vectors. */
50361 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
50364 rtx (*gen) (rtx, rtx, rtx);
50366 if (d->one_operand_p)
50368 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
50370 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
50376 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
50378 for (i = 0; i < nelt; i += 2)
50379 if (d->perm[i] != d->perm[0] + i / 2
50380 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
50390 gen = gen_vec_interleave_highv32qi;
50392 gen = gen_vec_interleave_lowv32qi;
50396 gen = gen_vec_interleave_highv16hi;
50398 gen = gen_vec_interleave_lowv16hi;
50402 gen = gen_vec_interleave_highv8si;
50404 gen = gen_vec_interleave_lowv8si;
50408 gen = gen_vec_interleave_highv4di;
50410 gen = gen_vec_interleave_lowv4di;
50414 gen = gen_vec_interleave_highv8sf;
50416 gen = gen_vec_interleave_lowv8sf;
50420 gen = gen_vec_interleave_highv4df;
50422 gen = gen_vec_interleave_lowv4df;
50425 gcc_unreachable ();
50428 emit_insn (gen (d->target, d->op0, d->op1));
50432 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
50433 a single vector permutation using a single intra-lane vector
50434 permutation, vperm2f128 swapping the lanes and vblend* insn blending
50435 the non-swapped and swapped vectors together. */
50438 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
50440 struct expand_vec_perm_d dfirst, dsecond;
50441 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
50444 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
50448 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
50449 || !d->one_operand_p)
50453 for (i = 0; i < nelt; i++)
50454 dfirst.perm[i] = 0xff;
50455 for (i = 0, msk = 0; i < nelt; i++)
50457 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
50458 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
50460 dfirst.perm[j] = d->perm[i];
50464 for (i = 0; i < nelt; i++)
50465 if (dfirst.perm[i] == 0xff)
50466 dfirst.perm[i] = i;
50469 dfirst.target = gen_reg_rtx (dfirst.vmode);
50472 ok = expand_vec_perm_1 (&dfirst);
50473 seq = get_insns ();
50485 dsecond.op0 = dfirst.target;
50486 dsecond.op1 = dfirst.target;
50487 dsecond.one_operand_p = true;
50488 dsecond.target = gen_reg_rtx (dsecond.vmode);
50489 for (i = 0; i < nelt; i++)
50490 dsecond.perm[i] = i ^ nelt2;
50492 ok = expand_vec_perm_1 (&dsecond);
50495 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
50496 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
50500 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
50501 permutation using two vperm2f128, followed by a vshufpd insn blending
50502 the two vectors together. */
50505 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
50507 struct expand_vec_perm_d dfirst, dsecond, dthird;
50510 if (!TARGET_AVX || (d->vmode != V4DFmode))
50520 dfirst.perm[0] = (d->perm[0] & ~1);
50521 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
50522 dfirst.perm[2] = (d->perm[2] & ~1);
50523 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
50524 dsecond.perm[0] = (d->perm[1] & ~1);
50525 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
50526 dsecond.perm[2] = (d->perm[3] & ~1);
50527 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
50528 dthird.perm[0] = (d->perm[0] % 2);
50529 dthird.perm[1] = (d->perm[1] % 2) + 4;
50530 dthird.perm[2] = (d->perm[2] % 2) + 2;
50531 dthird.perm[3] = (d->perm[3] % 2) + 6;
50533 dfirst.target = gen_reg_rtx (dfirst.vmode);
50534 dsecond.target = gen_reg_rtx (dsecond.vmode);
50535 dthird.op0 = dfirst.target;
50536 dthird.op1 = dsecond.target;
50537 dthird.one_operand_p = false;
50539 canonicalize_perm (&dfirst);
50540 canonicalize_perm (&dsecond);
50542 ok = expand_vec_perm_1 (&dfirst)
50543 && expand_vec_perm_1 (&dsecond)
50544 && expand_vec_perm_1 (&dthird);
50551 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
50552 permutation with two pshufb insns and an ior. We should have already
50553 failed all two instruction sequences. */
50556 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
50558 rtx rperm[2][16], vperm, l, h, op, m128;
50559 unsigned int i, nelt, eltsz;
50561 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50563 gcc_assert (!d->one_operand_p);
50569 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50571 /* Generate two permutation masks. If the required element is within
50572 the given vector it is shuffled into the proper lane. If the required
50573 element is in the other vector, force a zero into the lane by setting
50574 bit 7 in the permutation mask. */
50575 m128 = GEN_INT (-128);
50576 for (i = 0; i < nelt; ++i)
50578 unsigned j, e = d->perm[i];
50579 unsigned which = (e >= nelt);
50583 for (j = 0; j < eltsz; ++j)
50585 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50586 rperm[1-which][i*eltsz + j] = m128;
50590 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50591 vperm = force_reg (V16QImode, vperm);
50593 l = gen_reg_rtx (V16QImode);
50594 op = gen_lowpart (V16QImode, d->op0);
50595 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50597 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50598 vperm = force_reg (V16QImode, vperm);
50600 h = gen_reg_rtx (V16QImode);
50601 op = gen_lowpart (V16QImode, d->op1);
50602 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50605 if (d->vmode != V16QImode)
50606 op = gen_reg_rtx (V16QImode);
50607 emit_insn (gen_iorv16qi3 (op, l, h));
50608 if (op != d->target)
50609 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50614 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50615 with two vpshufb insns, vpermq and vpor. We should have already failed
50616 all two or three instruction sequences. */
50619 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50621 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50622 unsigned int i, nelt, eltsz;
50625 || !d->one_operand_p
50626 || (d->vmode != V32QImode && d->vmode != V16HImode))
50633 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50635 /* Generate two permutation masks. If the required element is within
50636 the same lane, it is shuffled in. If the required element from the
50637 other lane, force a zero by setting bit 7 in the permutation mask.
50638 In the other mask the mask has non-negative elements if element
50639 is requested from the other lane, but also moved to the other lane,
50640 so that the result of vpshufb can have the two V2TImode halves
50642 m128 = GEN_INT (-128);
50643 for (i = 0; i < nelt; ++i)
50645 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50646 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50648 for (j = 0; j < eltsz; ++j)
50650 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50651 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50655 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50656 vperm = force_reg (V32QImode, vperm);
50658 h = gen_reg_rtx (V32QImode);
50659 op = gen_lowpart (V32QImode, d->op0);
50660 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50662 /* Swap the 128-byte lanes of h into hp. */
50663 hp = gen_reg_rtx (V4DImode);
50664 op = gen_lowpart (V4DImode, h);
50665 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50668 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50669 vperm = force_reg (V32QImode, vperm);
50671 l = gen_reg_rtx (V32QImode);
50672 op = gen_lowpart (V32QImode, d->op0);
50673 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50676 if (d->vmode != V32QImode)
50677 op = gen_reg_rtx (V32QImode);
50678 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50679 if (op != d->target)
50680 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50685 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50686 and extract-odd permutations of two V32QImode and V16QImode operand
50687 with two vpshufb insns, vpor and vpermq. We should have already
50688 failed all two or three instruction sequences. */
50691 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50693 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50694 unsigned int i, nelt, eltsz;
50697 || d->one_operand_p
50698 || (d->vmode != V32QImode && d->vmode != V16HImode))
50701 for (i = 0; i < d->nelt; ++i)
50702 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50709 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50711 /* Generate two permutation masks. In the first permutation mask
50712 the first quarter will contain indexes for the first half
50713 of the op0, the second quarter will contain bit 7 set, third quarter
50714 will contain indexes for the second half of the op0 and the
50715 last quarter bit 7 set. In the second permutation mask
50716 the first quarter will contain bit 7 set, the second quarter
50717 indexes for the first half of the op1, the third quarter bit 7 set
50718 and last quarter indexes for the second half of the op1.
50719 I.e. the first mask e.g. for V32QImode extract even will be:
50720 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50721 (all values masked with 0xf except for -128) and second mask
50722 for extract even will be
50723 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50724 m128 = GEN_INT (-128);
50725 for (i = 0; i < nelt; ++i)
50727 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50728 unsigned which = d->perm[i] >= nelt;
50729 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50731 for (j = 0; j < eltsz; ++j)
50733 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50734 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50738 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50739 vperm = force_reg (V32QImode, vperm);
50741 l = gen_reg_rtx (V32QImode);
50742 op = gen_lowpart (V32QImode, d->op0);
50743 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50745 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50746 vperm = force_reg (V32QImode, vperm);
50748 h = gen_reg_rtx (V32QImode);
50749 op = gen_lowpart (V32QImode, d->op1);
50750 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50752 ior = gen_reg_rtx (V32QImode);
50753 emit_insn (gen_iorv32qi3 (ior, l, h));
50755 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50756 op = gen_reg_rtx (V4DImode);
50757 ior = gen_lowpart (V4DImode, ior);
50758 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50759 const1_rtx, GEN_INT (3)));
50760 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50765 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50766 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50767 with two "and" and "pack" or two "shift" and "pack" insns. We should
50768 have already failed all two instruction sequences. */
50771 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50773 rtx op, dop0, dop1, t, rperm[16];
50774 unsigned i, odd, c, s, nelt = d->nelt;
50775 bool end_perm = false;
50776 machine_mode half_mode;
50777 rtx (*gen_and) (rtx, rtx, rtx);
50778 rtx (*gen_pack) (rtx, rtx, rtx);
50779 rtx (*gen_shift) (rtx, rtx, rtx);
50781 if (d->one_operand_p)
50787 /* Required for "pack". */
50788 if (!TARGET_SSE4_1)
50792 half_mode = V4SImode;
50793 gen_and = gen_andv4si3;
50794 gen_pack = gen_sse4_1_packusdw;
50795 gen_shift = gen_lshrv4si3;
50798 /* No check as all instructions are SSE2. */
50801 half_mode = V8HImode;
50802 gen_and = gen_andv8hi3;
50803 gen_pack = gen_sse2_packuswb;
50804 gen_shift = gen_lshrv8hi3;
50811 half_mode = V8SImode;
50812 gen_and = gen_andv8si3;
50813 gen_pack = gen_avx2_packusdw;
50814 gen_shift = gen_lshrv8si3;
50822 half_mode = V16HImode;
50823 gen_and = gen_andv16hi3;
50824 gen_pack = gen_avx2_packuswb;
50825 gen_shift = gen_lshrv16hi3;
50829 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50830 general shuffles. */
50834 /* Check that permutation is even or odd. */
50839 for (i = 1; i < nelt; ++i)
50840 if (d->perm[i] != 2 * i + odd)
50846 dop0 = gen_reg_rtx (half_mode);
50847 dop1 = gen_reg_rtx (half_mode);
50850 for (i = 0; i < nelt / 2; i++)
50851 rperm[i] = GEN_INT (c);
50852 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50853 t = force_reg (half_mode, t);
50854 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50855 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50859 emit_insn (gen_shift (dop0,
50860 gen_lowpart (half_mode, d->op0),
50862 emit_insn (gen_shift (dop1,
50863 gen_lowpart (half_mode, d->op1),
50866 /* In AVX2 for 256 bit case we need to permute pack result. */
50867 if (TARGET_AVX2 && end_perm)
50869 op = gen_reg_rtx (d->vmode);
50870 t = gen_reg_rtx (V4DImode);
50871 emit_insn (gen_pack (op, dop0, dop1));
50872 emit_insn (gen_avx2_permv4di_1 (t,
50873 gen_lowpart (V4DImode, op),
50878 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50881 emit_insn (gen_pack (d->target, dop0, dop1));
50886 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50887 and extract-odd permutations of two V64QI operands
50888 with two "shifts", two "truncs" and one "concat" insns for "odd"
50889 and two "truncs" and one concat insn for "even."
50890 Have already failed all two instruction sequences. */
50893 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50895 rtx t1, t2, t3, t4;
50896 unsigned i, odd, nelt = d->nelt;
50898 if (!TARGET_AVX512BW
50899 || d->one_operand_p
50900 || d->vmode != V64QImode)
50903 /* Check that permutation is even or odd. */
50908 for (i = 1; i < nelt; ++i)
50909 if (d->perm[i] != 2 * i + odd)
50918 t1 = gen_reg_rtx (V32HImode);
50919 t2 = gen_reg_rtx (V32HImode);
50920 emit_insn (gen_lshrv32hi3 (t1,
50921 gen_lowpart (V32HImode, d->op0),
50923 emit_insn (gen_lshrv32hi3 (t2,
50924 gen_lowpart (V32HImode, d->op1),
50929 t1 = gen_lowpart (V32HImode, d->op0);
50930 t2 = gen_lowpart (V32HImode, d->op1);
50933 t3 = gen_reg_rtx (V32QImode);
50934 t4 = gen_reg_rtx (V32QImode);
50935 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50936 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50937 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50942 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50943 and extract-odd permutations. */
50946 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50948 rtx t1, t2, t3, t4, t5;
50955 t1 = gen_reg_rtx (V4DFmode);
50956 t2 = gen_reg_rtx (V4DFmode);
50958 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50959 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
50960 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
50962 /* Now an unpck[lh]pd will produce the result required. */
50964 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
50966 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
50972 int mask = odd ? 0xdd : 0x88;
50976 t1 = gen_reg_rtx (V8SFmode);
50977 t2 = gen_reg_rtx (V8SFmode);
50978 t3 = gen_reg_rtx (V8SFmode);
50980 /* Shuffle within the 128-bit lanes to produce:
50981 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
50982 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
50985 /* Shuffle the lanes around to produce:
50986 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
50987 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
50990 /* Shuffle within the 128-bit lanes to produce:
50991 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
50992 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
50994 /* Shuffle within the 128-bit lanes to produce:
50995 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
50996 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
50998 /* Shuffle the lanes around to produce:
50999 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
51000 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
51009 /* These are always directly implementable by expand_vec_perm_1. */
51010 gcc_unreachable ();
51014 return expand_vec_perm_even_odd_pack (d);
51015 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
51016 return expand_vec_perm_pshufb2 (d);
51021 /* We need 2*log2(N)-1 operations to achieve odd/even
51022 with interleave. */
51023 t1 = gen_reg_rtx (V8HImode);
51024 t2 = gen_reg_rtx (V8HImode);
51025 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
51026 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
51027 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
51028 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
51030 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
51032 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
51038 return expand_vec_perm_even_odd_pack (d);
51042 return expand_vec_perm_even_odd_pack (d);
51045 return expand_vec_perm_even_odd_trunc (d);
51050 struct expand_vec_perm_d d_copy = *d;
51051 d_copy.vmode = V4DFmode;
51053 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
51055 d_copy.target = gen_reg_rtx (V4DFmode);
51056 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
51057 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
51058 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51061 emit_move_insn (d->target,
51062 gen_lowpart (V4DImode, d_copy.target));
51071 t1 = gen_reg_rtx (V4DImode);
51072 t2 = gen_reg_rtx (V4DImode);
51074 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
51075 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
51076 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
51078 /* Now an vpunpck[lh]qdq will produce the result required. */
51080 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
51082 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
51089 struct expand_vec_perm_d d_copy = *d;
51090 d_copy.vmode = V8SFmode;
51092 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
51094 d_copy.target = gen_reg_rtx (V8SFmode);
51095 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
51096 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
51097 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51100 emit_move_insn (d->target,
51101 gen_lowpart (V8SImode, d_copy.target));
51110 t1 = gen_reg_rtx (V8SImode);
51111 t2 = gen_reg_rtx (V8SImode);
51112 t3 = gen_reg_rtx (V4DImode);
51113 t4 = gen_reg_rtx (V4DImode);
51114 t5 = gen_reg_rtx (V4DImode);
51116 /* Shuffle the lanes around into
51117 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
51118 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
51119 gen_lowpart (V4DImode, d->op1),
51121 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
51122 gen_lowpart (V4DImode, d->op1),
51125 /* Swap the 2nd and 3rd position in each lane into
51126 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
51127 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
51128 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51129 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
51130 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51132 /* Now an vpunpck[lh]qdq will produce
51133 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
51135 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
51136 gen_lowpart (V4DImode, t2));
51138 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
51139 gen_lowpart (V4DImode, t2));
51141 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
51145 gcc_unreachable ();
51151 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51152 extract-even and extract-odd permutations. */
51155 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
51157 unsigned i, odd, nelt = d->nelt;
51160 if (odd != 0 && odd != 1)
51163 for (i = 1; i < nelt; ++i)
51164 if (d->perm[i] != 2 * i + odd)
51167 return expand_vec_perm_even_odd_1 (d, odd);
51170 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
51171 permutations. We assume that expand_vec_perm_1 has already failed. */
51174 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
51176 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
51177 machine_mode vmode = d->vmode;
51178 unsigned char perm2[4];
51179 rtx op0 = d->op0, dest;
51186 /* These are special-cased in sse.md so that we can optionally
51187 use the vbroadcast instruction. They expand to two insns
51188 if the input happens to be in a register. */
51189 gcc_unreachable ();
51195 /* These are always implementable using standard shuffle patterns. */
51196 gcc_unreachable ();
51200 /* These can be implemented via interleave. We save one insn by
51201 stopping once we have promoted to V4SImode and then use pshufd. */
51207 rtx (*gen) (rtx, rtx, rtx)
51208 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
51209 : gen_vec_interleave_lowv8hi;
51213 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
51214 : gen_vec_interleave_highv8hi;
51219 dest = gen_reg_rtx (vmode);
51220 emit_insn (gen (dest, op0, op0));
51221 vmode = get_mode_wider_vector (vmode);
51222 op0 = gen_lowpart (vmode, dest);
51224 while (vmode != V4SImode);
51226 memset (perm2, elt, 4);
51227 dest = gen_reg_rtx (V4SImode);
51228 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
51231 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
51239 /* For AVX2 broadcasts of the first element vpbroadcast* or
51240 vpermq should be used by expand_vec_perm_1. */
51241 gcc_assert (!TARGET_AVX2 || d->perm[0]);
51245 gcc_unreachable ();
51249 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51250 broadcast permutations. */
51253 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
51255 unsigned i, elt, nelt = d->nelt;
51257 if (!d->one_operand_p)
51261 for (i = 1; i < nelt; ++i)
51262 if (d->perm[i] != elt)
51265 return expand_vec_perm_broadcast_1 (d);
51268 /* Implement arbitrary permutations of two V64QImode operands
51269 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
51271 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
51273 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
51279 struct expand_vec_perm_d ds[2];
51280 rtx rperm[128], vperm, target0, target1;
51281 unsigned int i, nelt;
51282 machine_mode vmode;
51287 for (i = 0; i < 2; i++)
51290 ds[i].vmode = V32HImode;
51292 ds[i].target = gen_reg_rtx (V32HImode);
51293 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
51294 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
51297 /* Prepare permutations such that the first one takes care of
51298 putting the even bytes into the right positions or one higher
51299 positions (ds[0]) and the second one takes care of
51300 putting the odd bytes into the right positions or one below
51303 for (i = 0; i < nelt; i++)
51305 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
51308 rperm[i] = constm1_rtx;
51309 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51313 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51314 rperm[i + 64] = constm1_rtx;
51318 bool ok = expand_vec_perm_1 (&ds[0]);
51320 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
51322 ok = expand_vec_perm_1 (&ds[1]);
51324 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
51326 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
51327 vperm = force_reg (vmode, vperm);
51328 target0 = gen_reg_rtx (V64QImode);
51329 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
51331 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
51332 vperm = force_reg (vmode, vperm);
51333 target1 = gen_reg_rtx (V64QImode);
51334 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
51336 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
51340 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
51341 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
51342 all the shorter instruction sequences. */
51345 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
51347 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
51348 unsigned int i, nelt, eltsz;
51352 || d->one_operand_p
51353 || (d->vmode != V32QImode && d->vmode != V16HImode))
51360 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51362 /* Generate 4 permutation masks. If the required element is within
51363 the same lane, it is shuffled in. If the required element from the
51364 other lane, force a zero by setting bit 7 in the permutation mask.
51365 In the other mask the mask has non-negative elements if element
51366 is requested from the other lane, but also moved to the other lane,
51367 so that the result of vpshufb can have the two V2TImode halves
51369 m128 = GEN_INT (-128);
51370 for (i = 0; i < 32; ++i)
51372 rperm[0][i] = m128;
51373 rperm[1][i] = m128;
51374 rperm[2][i] = m128;
51375 rperm[3][i] = m128;
51381 for (i = 0; i < nelt; ++i)
51383 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51384 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51385 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
51387 for (j = 0; j < eltsz; ++j)
51388 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
51389 used[which] = true;
51392 for (i = 0; i < 2; ++i)
51394 if (!used[2 * i + 1])
51399 vperm = gen_rtx_CONST_VECTOR (V32QImode,
51400 gen_rtvec_v (32, rperm[2 * i + 1]));
51401 vperm = force_reg (V32QImode, vperm);
51402 h[i] = gen_reg_rtx (V32QImode);
51403 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51404 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
51407 /* Swap the 128-byte lanes of h[X]. */
51408 for (i = 0; i < 2; ++i)
51410 if (h[i] == NULL_RTX)
51412 op = gen_reg_rtx (V4DImode);
51413 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
51414 const2_rtx, GEN_INT (3), const0_rtx,
51416 h[i] = gen_lowpart (V32QImode, op);
51419 for (i = 0; i < 2; ++i)
51426 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
51427 vperm = force_reg (V32QImode, vperm);
51428 l[i] = gen_reg_rtx (V32QImode);
51429 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51430 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
51433 for (i = 0; i < 2; ++i)
51437 op = gen_reg_rtx (V32QImode);
51438 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
51445 gcc_assert (l[0] && l[1]);
51447 if (d->vmode != V32QImode)
51448 op = gen_reg_rtx (V32QImode);
51449 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
51450 if (op != d->target)
51451 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51455 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
51456 With all of the interface bits taken care of, perform the expansion
51457 in D and return true on success. */
51460 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
51462 /* Try a single instruction expansion. */
51463 if (expand_vec_perm_1 (d))
51466 /* Try sequences of two instructions. */
51468 if (expand_vec_perm_pshuflw_pshufhw (d))
51471 if (expand_vec_perm_palignr (d, false))
51474 if (expand_vec_perm_interleave2 (d))
51477 if (expand_vec_perm_broadcast (d))
51480 if (expand_vec_perm_vpermq_perm_1 (d))
51483 if (expand_vec_perm_vperm2f128 (d))
51486 if (expand_vec_perm_pblendv (d))
51489 /* Try sequences of three instructions. */
51491 if (expand_vec_perm_even_odd_pack (d))
51494 if (expand_vec_perm_2vperm2f128_vshuf (d))
51497 if (expand_vec_perm_pshufb2 (d))
51500 if (expand_vec_perm_interleave3 (d))
51503 if (expand_vec_perm_vperm2f128_vblend (d))
51506 /* Try sequences of four instructions. */
51508 if (expand_vec_perm_even_odd_trunc (d))
51510 if (expand_vec_perm_vpshufb2_vpermq (d))
51513 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
51516 if (expand_vec_perm_vpermi2_vpshub2 (d))
51519 /* ??? Look for narrow permutations whose element orderings would
51520 allow the promotion to a wider mode. */
51522 /* ??? Look for sequences of interleave or a wider permute that place
51523 the data into the correct lanes for a half-vector shuffle like
51524 pshuf[lh]w or vpermilps. */
51526 /* ??? Look for sequences of interleave that produce the desired results.
51527 The combinatorics of punpck[lh] get pretty ugly... */
51529 if (expand_vec_perm_even_odd (d))
51532 /* Even longer sequences. */
51533 if (expand_vec_perm_vpshufb4_vpermq2 (d))
51536 /* See if we can get the same permutation in different vector integer
51538 struct expand_vec_perm_d nd;
51539 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
51542 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
51549 /* If a permutation only uses one operand, make it clear. Returns true
51550 if the permutation references both operands. */
51553 canonicalize_perm (struct expand_vec_perm_d *d)
51555 int i, which, nelt = d->nelt;
51557 for (i = which = 0; i < nelt; ++i)
51558 which |= (d->perm[i] < nelt ? 1 : 2);
51560 d->one_operand_p = true;
51567 if (!rtx_equal_p (d->op0, d->op1))
51569 d->one_operand_p = false;
51572 /* The elements of PERM do not suggest that only the first operand
51573 is used, but both operands are identical. Allow easier matching
51574 of the permutation by folding the permutation into the single
51579 for (i = 0; i < nelt; ++i)
51580 d->perm[i] &= nelt - 1;
51589 return (which == 3);
51593 ix86_expand_vec_perm_const (rtx operands[4])
51595 struct expand_vec_perm_d d;
51596 unsigned char perm[MAX_VECT_LEN];
51601 d.target = operands[0];
51602 d.op0 = operands[1];
51603 d.op1 = operands[2];
51606 d.vmode = GET_MODE (d.target);
51607 gcc_assert (VECTOR_MODE_P (d.vmode));
51608 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51609 d.testing_p = false;
51611 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51612 gcc_assert (XVECLEN (sel, 0) == nelt);
51613 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51615 for (i = 0; i < nelt; ++i)
51617 rtx e = XVECEXP (sel, 0, i);
51618 int ei = INTVAL (e) & (2 * nelt - 1);
51623 two_args = canonicalize_perm (&d);
51625 if (ix86_expand_vec_perm_const_1 (&d))
51628 /* If the selector says both arguments are needed, but the operands are the
51629 same, the above tried to expand with one_operand_p and flattened selector.
51630 If that didn't work, retry without one_operand_p; we succeeded with that
51632 if (two_args && d.one_operand_p)
51634 d.one_operand_p = false;
51635 memcpy (d.perm, perm, sizeof (perm));
51636 return ix86_expand_vec_perm_const_1 (&d);
51642 /* Implement targetm.vectorize.vec_perm_const_ok. */
51645 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51646 const unsigned char *sel)
51648 struct expand_vec_perm_d d;
51649 unsigned int i, nelt, which;
51653 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51654 d.testing_p = true;
51656 /* Given sufficient ISA support we can just return true here
51657 for selected vector modes. */
51664 if (TARGET_AVX512F)
51665 /* All implementable with a single vpermi2 insn. */
51669 if (TARGET_AVX512BW)
51670 /* All implementable with a single vpermi2 insn. */
51674 if (TARGET_AVX512BW)
51675 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51682 if (TARGET_AVX512VL)
51683 /* All implementable with a single vpermi2 insn. */
51688 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51693 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51700 /* All implementable with a single vpperm insn. */
51703 /* All implementable with 2 pshufb + 1 ior. */
51709 /* All implementable with shufpd or unpck[lh]pd. */
51715 /* Extract the values from the vector CST into the permutation
51717 memcpy (d.perm, sel, nelt);
51718 for (i = which = 0; i < nelt; ++i)
51720 unsigned char e = d.perm[i];
51721 gcc_assert (e < 2 * nelt);
51722 which |= (e < nelt ? 1 : 2);
51725 /* For all elements from second vector, fold the elements to first. */
51727 for (i = 0; i < nelt; ++i)
51730 /* Check whether the mask can be applied to the vector type. */
51731 d.one_operand_p = (which != 3);
51733 /* Implementable with shufps or pshufd. */
51734 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51737 /* Otherwise we have to go through the motions and see if we can
51738 figure out how to generate the requested permutation. */
51739 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51740 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51741 if (!d.one_operand_p)
51742 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51745 ret = ix86_expand_vec_perm_const_1 (&d);
51752 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51754 struct expand_vec_perm_d d;
51760 d.vmode = GET_MODE (targ);
51761 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51762 d.one_operand_p = false;
51763 d.testing_p = false;
51765 for (i = 0; i < nelt; ++i)
51766 d.perm[i] = i * 2 + odd;
51768 /* We'll either be able to implement the permutation directly... */
51769 if (expand_vec_perm_1 (&d))
51772 /* ... or we use the special-case patterns. */
51773 expand_vec_perm_even_odd_1 (&d, odd);
51777 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51779 struct expand_vec_perm_d d;
51780 unsigned i, nelt, base;
51786 d.vmode = GET_MODE (targ);
51787 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51788 d.one_operand_p = false;
51789 d.testing_p = false;
51791 base = high_p ? nelt / 2 : 0;
51792 for (i = 0; i < nelt / 2; ++i)
51794 d.perm[i * 2] = i + base;
51795 d.perm[i * 2 + 1] = i + base + nelt;
51798 /* Note that for AVX this isn't one instruction. */
51799 ok = ix86_expand_vec_perm_const_1 (&d);
51804 /* Expand a vector operation CODE for a V*QImode in terms of the
51805 same operation on V*HImode. */
51808 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51810 machine_mode qimode = GET_MODE (dest);
51811 machine_mode himode;
51812 rtx (*gen_il) (rtx, rtx, rtx);
51813 rtx (*gen_ih) (rtx, rtx, rtx);
51814 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51815 struct expand_vec_perm_d d;
51816 bool ok, full_interleave;
51817 bool uns_p = false;
51824 gen_il = gen_vec_interleave_lowv16qi;
51825 gen_ih = gen_vec_interleave_highv16qi;
51828 himode = V16HImode;
51829 gen_il = gen_avx2_interleave_lowv32qi;
51830 gen_ih = gen_avx2_interleave_highv32qi;
51833 himode = V32HImode;
51834 gen_il = gen_avx512bw_interleave_lowv64qi;
51835 gen_ih = gen_avx512bw_interleave_highv64qi;
51838 gcc_unreachable ();
51841 op2_l = op2_h = op2;
51845 /* Unpack data such that we've got a source byte in each low byte of
51846 each word. We don't care what goes into the high byte of each word.
51847 Rather than trying to get zero in there, most convenient is to let
51848 it be a copy of the low byte. */
51849 op2_l = gen_reg_rtx (qimode);
51850 op2_h = gen_reg_rtx (qimode);
51851 emit_insn (gen_il (op2_l, op2, op2));
51852 emit_insn (gen_ih (op2_h, op2, op2));
51855 op1_l = gen_reg_rtx (qimode);
51856 op1_h = gen_reg_rtx (qimode);
51857 emit_insn (gen_il (op1_l, op1, op1));
51858 emit_insn (gen_ih (op1_h, op1, op1));
51859 full_interleave = qimode == V16QImode;
51867 op1_l = gen_reg_rtx (himode);
51868 op1_h = gen_reg_rtx (himode);
51869 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51870 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51871 full_interleave = true;
51874 gcc_unreachable ();
51877 /* Perform the operation. */
51878 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51880 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51882 gcc_assert (res_l && res_h);
51884 /* Merge the data back into the right place. */
51886 d.op0 = gen_lowpart (qimode, res_l);
51887 d.op1 = gen_lowpart (qimode, res_h);
51889 d.nelt = GET_MODE_NUNITS (qimode);
51890 d.one_operand_p = false;
51891 d.testing_p = false;
51893 if (full_interleave)
51895 /* For SSE2, we used an full interleave, so the desired
51896 results are in the even elements. */
51897 for (i = 0; i < 64; ++i)
51902 /* For AVX, the interleave used above was not cross-lane. So the
51903 extraction is evens but with the second and third quarter swapped.
51904 Happily, that is even one insn shorter than even extraction. */
51905 for (i = 0; i < 64; ++i)
51906 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
51909 ok = ix86_expand_vec_perm_const_1 (&d);
51912 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51913 gen_rtx_fmt_ee (code, qimode, op1, op2));
51916 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51917 if op is CONST_VECTOR with all odd elements equal to their
51918 preceding element. */
51921 const_vector_equal_evenodd_p (rtx op)
51923 machine_mode mode = GET_MODE (op);
51924 int i, nunits = GET_MODE_NUNITS (mode);
51925 if (GET_CODE (op) != CONST_VECTOR
51926 || nunits != CONST_VECTOR_NUNITS (op))
51928 for (i = 0; i < nunits; i += 2)
51929 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51935 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51936 bool uns_p, bool odd_p)
51938 machine_mode mode = GET_MODE (op1);
51939 machine_mode wmode = GET_MODE (dest);
51941 rtx orig_op1 = op1, orig_op2 = op2;
51943 if (!nonimmediate_operand (op1, mode))
51944 op1 = force_reg (mode, op1);
51945 if (!nonimmediate_operand (op2, mode))
51946 op2 = force_reg (mode, op2);
51948 /* We only play even/odd games with vectors of SImode. */
51949 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51951 /* If we're looking for the odd results, shift those members down to
51952 the even slots. For some cpus this is faster than a PSHUFD. */
51955 /* For XOP use vpmacsdqh, but only for smult, as it is only
51957 if (TARGET_XOP && mode == V4SImode && !uns_p)
51959 x = force_reg (wmode, CONST0_RTX (wmode));
51960 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
51964 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
51965 if (!const_vector_equal_evenodd_p (orig_op1))
51966 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
51967 x, NULL, 1, OPTAB_DIRECT);
51968 if (!const_vector_equal_evenodd_p (orig_op2))
51969 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
51970 x, NULL, 1, OPTAB_DIRECT);
51971 op1 = gen_lowpart (mode, op1);
51972 op2 = gen_lowpart (mode, op2);
51975 if (mode == V16SImode)
51978 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
51980 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
51982 else if (mode == V8SImode)
51985 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
51987 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
51990 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
51991 else if (TARGET_SSE4_1)
51992 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
51995 rtx s1, s2, t0, t1, t2;
51997 /* The easiest way to implement this without PMULDQ is to go through
51998 the motions as if we are performing a full 64-bit multiply. With
51999 the exception that we need to do less shuffling of the elements. */
52001 /* Compute the sign-extension, aka highparts, of the two operands. */
52002 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
52003 op1, pc_rtx, pc_rtx);
52004 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
52005 op2, pc_rtx, pc_rtx);
52007 /* Multiply LO(A) * HI(B), and vice-versa. */
52008 t1 = gen_reg_rtx (wmode);
52009 t2 = gen_reg_rtx (wmode);
52010 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
52011 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
52013 /* Multiply LO(A) * LO(B). */
52014 t0 = gen_reg_rtx (wmode);
52015 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
52017 /* Combine and shift the highparts into place. */
52018 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
52019 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
52022 /* Combine high and low parts. */
52023 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
52030 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
52031 bool uns_p, bool high_p)
52033 machine_mode wmode = GET_MODE (dest);
52034 machine_mode mode = GET_MODE (op1);
52035 rtx t1, t2, t3, t4, mask;
52040 t1 = gen_reg_rtx (mode);
52041 t2 = gen_reg_rtx (mode);
52042 if (TARGET_XOP && !uns_p)
52044 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
52045 shuffle the elements once so that all elements are in the right
52046 place for immediate use: { A C B D }. */
52047 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
52048 const1_rtx, GEN_INT (3)));
52049 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
52050 const1_rtx, GEN_INT (3)));
52054 /* Put the elements into place for the multiply. */
52055 ix86_expand_vec_interleave (t1, op1, op1, high_p);
52056 ix86_expand_vec_interleave (t2, op2, op2, high_p);
52059 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
52063 /* Shuffle the elements between the lanes. After this we
52064 have { A B E F | C D G H } for each operand. */
52065 t1 = gen_reg_rtx (V4DImode);
52066 t2 = gen_reg_rtx (V4DImode);
52067 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
52068 const0_rtx, const2_rtx,
52069 const1_rtx, GEN_INT (3)));
52070 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
52071 const0_rtx, const2_rtx,
52072 const1_rtx, GEN_INT (3)));
52074 /* Shuffle the elements within the lanes. After this we
52075 have { A A B B | C C D D } or { E E F F | G G H H }. */
52076 t3 = gen_reg_rtx (V8SImode);
52077 t4 = gen_reg_rtx (V8SImode);
52078 mask = GEN_INT (high_p
52079 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
52080 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
52081 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
52082 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
52084 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
52089 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
52090 uns_p, OPTAB_DIRECT);
52091 t2 = expand_binop (mode,
52092 uns_p ? umul_highpart_optab : smul_highpart_optab,
52093 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
52094 gcc_assert (t1 && t2);
52096 t3 = gen_reg_rtx (mode);
52097 ix86_expand_vec_interleave (t3, t1, t2, high_p);
52098 emit_move_insn (dest, gen_lowpart (wmode, t3));
52106 t1 = gen_reg_rtx (wmode);
52107 t2 = gen_reg_rtx (wmode);
52108 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
52109 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
52111 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
52115 gcc_unreachable ();
52120 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
52122 rtx res_1, res_2, res_3, res_4;
52124 res_1 = gen_reg_rtx (V4SImode);
52125 res_2 = gen_reg_rtx (V4SImode);
52126 res_3 = gen_reg_rtx (V2DImode);
52127 res_4 = gen_reg_rtx (V2DImode);
52128 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
52129 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
52131 /* Move the results in element 2 down to element 1; we don't care
52132 what goes in elements 2 and 3. Then we can merge the parts
52133 back together with an interleave.
52135 Note that two other sequences were tried:
52136 (1) Use interleaves at the start instead of psrldq, which allows
52137 us to use a single shufps to merge things back at the end.
52138 (2) Use shufps here to combine the two vectors, then pshufd to
52139 put the elements in the correct order.
52140 In both cases the cost of the reformatting stall was too high
52141 and the overall sequence slower. */
52143 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
52144 const0_rtx, const2_rtx,
52145 const0_rtx, const0_rtx));
52146 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
52147 const0_rtx, const2_rtx,
52148 const0_rtx, const0_rtx));
52149 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
52151 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
52155 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
52157 machine_mode mode = GET_MODE (op0);
52158 rtx t1, t2, t3, t4, t5, t6;
52160 if (TARGET_AVX512DQ && mode == V8DImode)
52161 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
52162 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
52163 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
52164 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
52165 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
52166 else if (TARGET_XOP && mode == V2DImode)
52168 /* op1: A,B,C,D, op2: E,F,G,H */
52169 op1 = gen_lowpart (V4SImode, op1);
52170 op2 = gen_lowpart (V4SImode, op2);
52172 t1 = gen_reg_rtx (V4SImode);
52173 t2 = gen_reg_rtx (V4SImode);
52174 t3 = gen_reg_rtx (V2DImode);
52175 t4 = gen_reg_rtx (V2DImode);
52178 emit_insn (gen_sse2_pshufd_1 (t1, op1,
52184 /* t2: (B*E),(A*F),(D*G),(C*H) */
52185 emit_insn (gen_mulv4si3 (t2, t1, op2));
52187 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
52188 emit_insn (gen_xop_phadddq (t3, t2));
52190 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
52191 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
52193 /* Multiply lower parts and add all */
52194 t5 = gen_reg_rtx (V2DImode);
52195 emit_insn (gen_vec_widen_umult_even_v4si (t5,
52196 gen_lowpart (V4SImode, op1),
52197 gen_lowpart (V4SImode, op2)));
52198 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
52203 machine_mode nmode;
52204 rtx (*umul) (rtx, rtx, rtx);
52206 if (mode == V2DImode)
52208 umul = gen_vec_widen_umult_even_v4si;
52211 else if (mode == V4DImode)
52213 umul = gen_vec_widen_umult_even_v8si;
52216 else if (mode == V8DImode)
52218 umul = gen_vec_widen_umult_even_v16si;
52222 gcc_unreachable ();
52225 /* Multiply low parts. */
52226 t1 = gen_reg_rtx (mode);
52227 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
52229 /* Shift input vectors right 32 bits so we can multiply high parts. */
52231 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
52232 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
52234 /* Multiply high parts by low parts. */
52235 t4 = gen_reg_rtx (mode);
52236 t5 = gen_reg_rtx (mode);
52237 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
52238 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
52240 /* Combine and shift the highparts back. */
52241 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
52242 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
52244 /* Combine high and low parts. */
52245 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
52248 set_unique_reg_note (get_last_insn (), REG_EQUAL,
52249 gen_rtx_MULT (mode, op1, op2));
52252 /* Return 1 if control tansfer instruction INSN
52253 should be encoded with bnd prefix.
52254 If insn is NULL then return 1 when control
52255 transfer instructions should be prefixed with
52256 bnd by default for current function. */
52259 ix86_bnd_prefixed_insn_p (rtx insn)
52261 /* For call insns check special flag. */
52262 if (insn && CALL_P (insn))
52264 rtx call = get_call_rtx_from (insn);
52266 return CALL_EXPR_WITH_BOUNDS_P (call);
52269 /* All other insns are prefixed only if function is instrumented. */
52270 return chkp_function_instrumented_p (current_function_decl);
52273 /* Calculate integer abs() using only SSE2 instructions. */
52276 ix86_expand_sse2_abs (rtx target, rtx input)
52278 machine_mode mode = GET_MODE (target);
52283 /* For 32-bit signed integer X, the best way to calculate the absolute
52284 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
52286 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
52287 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
52288 NULL, 0, OPTAB_DIRECT);
52289 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
52290 NULL, 0, OPTAB_DIRECT);
52291 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
52292 target, 0, OPTAB_DIRECT);
52295 /* For 16-bit signed integer X, the best way to calculate the absolute
52296 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
52298 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52300 x = expand_simple_binop (mode, SMAX, tmp0, input,
52301 target, 0, OPTAB_DIRECT);
52304 /* For 8-bit signed integer X, the best way to calculate the absolute
52305 value of X is min ((unsigned char) X, (unsigned char) (-X)),
52306 as SSE2 provides the PMINUB insn. */
52308 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52310 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
52311 target, 0, OPTAB_DIRECT);
52315 gcc_unreachable ();
52319 emit_move_insn (target, x);
52322 /* Expand an extract from a vector register through pextr insn.
52323 Return true if successful. */
52326 ix86_expand_pextr (rtx *operands)
52328 rtx dst = operands[0];
52329 rtx src = operands[1];
52331 unsigned int size = INTVAL (operands[2]);
52332 unsigned int pos = INTVAL (operands[3]);
52334 if (SUBREG_P (dst))
52336 /* Reject non-lowpart subregs. */
52337 if (SUBREG_BYTE (dst) > 0)
52339 dst = SUBREG_REG (dst);
52342 if (SUBREG_P (src))
52344 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
52345 src = SUBREG_REG (src);
52348 switch (GET_MODE (src))
52357 machine_mode srcmode, dstmode;
52360 dstmode = mode_for_size (size, MODE_INT, 0);
52365 if (!TARGET_SSE4_1)
52367 srcmode = V16QImode;
52373 srcmode = V8HImode;
52377 if (!TARGET_SSE4_1)
52379 srcmode = V4SImode;
52383 gcc_assert (TARGET_64BIT);
52384 if (!TARGET_SSE4_1)
52386 srcmode = V2DImode;
52393 /* Reject extractions from misaligned positions. */
52394 if (pos & (size-1))
52397 if (GET_MODE (dst) == dstmode)
52400 d = gen_reg_rtx (dstmode);
52402 /* Construct insn pattern. */
52403 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
52404 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
52406 /* Let the rtl optimizers know about the zero extension performed. */
52407 if (dstmode == QImode || dstmode == HImode)
52409 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
52410 d = gen_lowpart (SImode, d);
52413 emit_insn (gen_rtx_SET (d, pat));
52416 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52425 /* Expand an insert into a vector register through pinsr insn.
52426 Return true if successful. */
52429 ix86_expand_pinsr (rtx *operands)
52431 rtx dst = operands[0];
52432 rtx src = operands[3];
52434 unsigned int size = INTVAL (operands[1]);
52435 unsigned int pos = INTVAL (operands[2]);
52437 if (SUBREG_P (dst))
52439 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
52440 dst = SUBREG_REG (dst);
52443 switch (GET_MODE (dst))
52452 machine_mode srcmode, dstmode;
52453 rtx (*pinsr)(rtx, rtx, rtx, rtx);
52456 srcmode = mode_for_size (size, MODE_INT, 0);
52461 if (!TARGET_SSE4_1)
52463 dstmode = V16QImode;
52464 pinsr = gen_sse4_1_pinsrb;
52470 dstmode = V8HImode;
52471 pinsr = gen_sse2_pinsrw;
52475 if (!TARGET_SSE4_1)
52477 dstmode = V4SImode;
52478 pinsr = gen_sse4_1_pinsrd;
52482 gcc_assert (TARGET_64BIT);
52483 if (!TARGET_SSE4_1)
52485 dstmode = V2DImode;
52486 pinsr = gen_sse4_1_pinsrq;
52493 /* Reject insertions to misaligned positions. */
52494 if (pos & (size-1))
52497 if (SUBREG_P (src))
52499 unsigned int srcpos = SUBREG_BYTE (src);
52505 extr_ops[0] = gen_reg_rtx (srcmode);
52506 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
52507 extr_ops[2] = GEN_INT (size);
52508 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
52510 if (!ix86_expand_pextr (extr_ops))
52516 src = gen_lowpart (srcmode, SUBREG_REG (src));
52519 if (GET_MODE (dst) == dstmode)
52522 d = gen_reg_rtx (dstmode);
52524 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
52525 gen_lowpart (srcmode, src),
52526 GEN_INT (1 << (pos / size))));
52528 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52537 /* This function returns the calling abi specific va_list type node.
52538 It returns the FNDECL specific va_list type. */
52541 ix86_fn_abi_va_list (tree fndecl)
52544 return va_list_type_node;
52545 gcc_assert (fndecl != NULL_TREE);
52547 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
52548 return ms_va_list_type_node;
52550 return sysv_va_list_type_node;
52553 /* Returns the canonical va_list type specified by TYPE. If there
52554 is no valid TYPE provided, it return NULL_TREE. */
52557 ix86_canonical_va_list_type (tree type)
52561 /* Resolve references and pointers to va_list type. */
52562 if (TREE_CODE (type) == MEM_REF)
52563 type = TREE_TYPE (type);
52564 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
52565 type = TREE_TYPE (type);
52566 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
52567 type = TREE_TYPE (type);
52569 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
52571 wtype = va_list_type_node;
52572 gcc_assert (wtype != NULL_TREE);
52574 if (TREE_CODE (wtype) == ARRAY_TYPE)
52576 /* If va_list is an array type, the argument may have decayed
52577 to a pointer type, e.g. by being passed to another function.
52578 In that case, unwrap both types so that we can compare the
52579 underlying records. */
52580 if (TREE_CODE (htype) == ARRAY_TYPE
52581 || POINTER_TYPE_P (htype))
52583 wtype = TREE_TYPE (wtype);
52584 htype = TREE_TYPE (htype);
52587 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52588 return va_list_type_node;
52589 wtype = sysv_va_list_type_node;
52590 gcc_assert (wtype != NULL_TREE);
52592 if (TREE_CODE (wtype) == ARRAY_TYPE)
52594 /* If va_list is an array type, the argument may have decayed
52595 to a pointer type, e.g. by being passed to another function.
52596 In that case, unwrap both types so that we can compare the
52597 underlying records. */
52598 if (TREE_CODE (htype) == ARRAY_TYPE
52599 || POINTER_TYPE_P (htype))
52601 wtype = TREE_TYPE (wtype);
52602 htype = TREE_TYPE (htype);
52605 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52606 return sysv_va_list_type_node;
52607 wtype = ms_va_list_type_node;
52608 gcc_assert (wtype != NULL_TREE);
52610 if (TREE_CODE (wtype) == ARRAY_TYPE)
52612 /* If va_list is an array type, the argument may have decayed
52613 to a pointer type, e.g. by being passed to another function.
52614 In that case, unwrap both types so that we can compare the
52615 underlying records. */
52616 if (TREE_CODE (htype) == ARRAY_TYPE
52617 || POINTER_TYPE_P (htype))
52619 wtype = TREE_TYPE (wtype);
52620 htype = TREE_TYPE (htype);
52623 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52624 return ms_va_list_type_node;
52627 return std_canonical_va_list_type (type);
52630 /* Iterate through the target-specific builtin types for va_list.
52631 IDX denotes the iterator, *PTREE is set to the result type of
52632 the va_list builtin, and *PNAME to its internal type.
52633 Returns zero if there is no element for this index, otherwise
52634 IDX should be increased upon the next call.
52635 Note, do not iterate a base builtin's name like __builtin_va_list.
52636 Used from c_common_nodes_and_builtins. */
52639 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52649 *ptree = ms_va_list_type_node;
52650 *pname = "__builtin_ms_va_list";
52654 *ptree = sysv_va_list_type_node;
52655 *pname = "__builtin_sysv_va_list";
52663 #undef TARGET_SCHED_DISPATCH
52664 #define TARGET_SCHED_DISPATCH has_dispatch
52665 #undef TARGET_SCHED_DISPATCH_DO
52666 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52667 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52668 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52669 #undef TARGET_SCHED_REORDER
52670 #define TARGET_SCHED_REORDER ix86_sched_reorder
52671 #undef TARGET_SCHED_ADJUST_PRIORITY
52672 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52673 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52674 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52675 ix86_dependencies_evaluation_hook
52677 /* The size of the dispatch window is the total number of bytes of
52678 object code allowed in a window. */
52679 #define DISPATCH_WINDOW_SIZE 16
52681 /* Number of dispatch windows considered for scheduling. */
52682 #define MAX_DISPATCH_WINDOWS 3
52684 /* Maximum number of instructions in a window. */
52687 /* Maximum number of immediate operands in a window. */
52690 /* Maximum number of immediate bits allowed in a window. */
52691 #define MAX_IMM_SIZE 128
52693 /* Maximum number of 32 bit immediates allowed in a window. */
52694 #define MAX_IMM_32 4
52696 /* Maximum number of 64 bit immediates allowed in a window. */
52697 #define MAX_IMM_64 2
52699 /* Maximum total of loads or prefetches allowed in a window. */
52702 /* Maximum total of stores allowed in a window. */
52703 #define MAX_STORE 1
52709 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52710 enum dispatch_group {
52725 /* Number of allowable groups in a dispatch window. It is an array
52726 indexed by dispatch_group enum. 100 is used as a big number,
52727 because the number of these kind of operations does not have any
52728 effect in dispatch window, but we need them for other reasons in
52730 static unsigned int num_allowable_groups[disp_last] = {
52731 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52734 char group_name[disp_last + 1][16] = {
52735 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52736 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52737 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52740 /* Instruction path. */
52743 path_single, /* Single micro op. */
52744 path_double, /* Double micro op. */
52745 path_multi, /* Instructions with more than 2 micro op.. */
52749 /* sched_insn_info defines a window to the instructions scheduled in
52750 the basic block. It contains a pointer to the insn_info table and
52751 the instruction scheduled.
52753 Windows are allocated for each basic block and are linked
52755 typedef struct sched_insn_info_s {
52757 enum dispatch_group group;
52758 enum insn_path path;
52763 /* Linked list of dispatch windows. This is a two way list of
52764 dispatch windows of a basic block. It contains information about
52765 the number of uops in the window and the total number of
52766 instructions and of bytes in the object code for this dispatch
52768 typedef struct dispatch_windows_s {
52769 int num_insn; /* Number of insn in the window. */
52770 int num_uops; /* Number of uops in the window. */
52771 int window_size; /* Number of bytes in the window. */
52772 int window_num; /* Window number between 0 or 1. */
52773 int num_imm; /* Number of immediates in an insn. */
52774 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52775 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52776 int imm_size; /* Total immediates in the window. */
52777 int num_loads; /* Total memory loads in the window. */
52778 int num_stores; /* Total memory stores in the window. */
52779 int violation; /* Violation exists in window. */
52780 sched_insn_info *window; /* Pointer to the window. */
52781 struct dispatch_windows_s *next;
52782 struct dispatch_windows_s *prev;
52783 } dispatch_windows;
52785 /* Immediate valuse used in an insn. */
52786 typedef struct imm_info_s
52793 static dispatch_windows *dispatch_window_list;
52794 static dispatch_windows *dispatch_window_list1;
52796 /* Get dispatch group of insn. */
52798 static enum dispatch_group
52799 get_mem_group (rtx_insn *insn)
52801 enum attr_memory memory;
52803 if (INSN_CODE (insn) < 0)
52804 return disp_no_group;
52805 memory = get_attr_memory (insn);
52806 if (memory == MEMORY_STORE)
52809 if (memory == MEMORY_LOAD)
52812 if (memory == MEMORY_BOTH)
52813 return disp_load_store;
52815 return disp_no_group;
52818 /* Return true if insn is a compare instruction. */
52821 is_cmp (rtx_insn *insn)
52823 enum attr_type type;
52825 type = get_attr_type (insn);
52826 return (type == TYPE_TEST
52827 || type == TYPE_ICMP
52828 || type == TYPE_FCMP
52829 || GET_CODE (PATTERN (insn)) == COMPARE);
52832 /* Return true if a dispatch violation encountered. */
52835 dispatch_violation (void)
52837 if (dispatch_window_list->next)
52838 return dispatch_window_list->next->violation;
52839 return dispatch_window_list->violation;
52842 /* Return true if insn is a branch instruction. */
52845 is_branch (rtx_insn *insn)
52847 return (CALL_P (insn) || JUMP_P (insn));
52850 /* Return true if insn is a prefetch instruction. */
52853 is_prefetch (rtx_insn *insn)
52855 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52858 /* This function initializes a dispatch window and the list container holding a
52859 pointer to the window. */
52862 init_window (int window_num)
52865 dispatch_windows *new_list;
52867 if (window_num == 0)
52868 new_list = dispatch_window_list;
52870 new_list = dispatch_window_list1;
52872 new_list->num_insn = 0;
52873 new_list->num_uops = 0;
52874 new_list->window_size = 0;
52875 new_list->next = NULL;
52876 new_list->prev = NULL;
52877 new_list->window_num = window_num;
52878 new_list->num_imm = 0;
52879 new_list->num_imm_32 = 0;
52880 new_list->num_imm_64 = 0;
52881 new_list->imm_size = 0;
52882 new_list->num_loads = 0;
52883 new_list->num_stores = 0;
52884 new_list->violation = false;
52886 for (i = 0; i < MAX_INSN; i++)
52888 new_list->window[i].insn = NULL;
52889 new_list->window[i].group = disp_no_group;
52890 new_list->window[i].path = no_path;
52891 new_list->window[i].byte_len = 0;
52892 new_list->window[i].imm_bytes = 0;
52897 /* This function allocates and initializes a dispatch window and the
52898 list container holding a pointer to the window. */
52900 static dispatch_windows *
52901 allocate_window (void)
52903 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52904 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52909 /* This routine initializes the dispatch scheduling information. It
52910 initiates building dispatch scheduler tables and constructs the
52911 first dispatch window. */
52914 init_dispatch_sched (void)
52916 /* Allocate a dispatch list and a window. */
52917 dispatch_window_list = allocate_window ();
52918 dispatch_window_list1 = allocate_window ();
52923 /* This function returns true if a branch is detected. End of a basic block
52924 does not have to be a branch, but here we assume only branches end a
52928 is_end_basic_block (enum dispatch_group group)
52930 return group == disp_branch;
52933 /* This function is called when the end of a window processing is reached. */
52936 process_end_window (void)
52938 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52939 if (dispatch_window_list->next)
52941 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52942 gcc_assert (dispatch_window_list->window_size
52943 + dispatch_window_list1->window_size <= 48);
52949 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52950 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52951 for 48 bytes of instructions. Note that these windows are not dispatch
52952 windows that their sizes are DISPATCH_WINDOW_SIZE. */
52954 static dispatch_windows *
52955 allocate_next_window (int window_num)
52957 if (window_num == 0)
52959 if (dispatch_window_list->next)
52962 return dispatch_window_list;
52965 dispatch_window_list->next = dispatch_window_list1;
52966 dispatch_window_list1->prev = dispatch_window_list;
52968 return dispatch_window_list1;
52971 /* Compute number of immediate operands of an instruction. */
52974 find_constant (rtx in_rtx, imm_info *imm_values)
52976 if (INSN_P (in_rtx))
52977 in_rtx = PATTERN (in_rtx);
52978 subrtx_iterator::array_type array;
52979 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
52980 if (const_rtx x = *iter)
52981 switch (GET_CODE (x))
52986 (imm_values->imm)++;
52987 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
52988 (imm_values->imm32)++;
52990 (imm_values->imm64)++;
52994 case CONST_WIDE_INT:
52995 (imm_values->imm)++;
52996 (imm_values->imm64)++;
53000 if (LABEL_KIND (x) == LABEL_NORMAL)
53002 (imm_values->imm)++;
53003 (imm_values->imm32)++;
53012 /* Return total size of immediate operands of an instruction along with number
53013 of corresponding immediate-operands. It initializes its parameters to zero
53014 befor calling FIND_CONSTANT.
53015 INSN is the input instruction. IMM is the total of immediates.
53016 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
53020 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
53022 imm_info imm_values = {0, 0, 0};
53024 find_constant (insn, &imm_values);
53025 *imm = imm_values.imm;
53026 *imm32 = imm_values.imm32;
53027 *imm64 = imm_values.imm64;
53028 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
53031 /* This function indicates if an operand of an instruction is an
53035 has_immediate (rtx_insn *insn)
53037 int num_imm_operand;
53038 int num_imm32_operand;
53039 int num_imm64_operand;
53042 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53043 &num_imm64_operand);
53047 /* Return single or double path for instructions. */
53049 static enum insn_path
53050 get_insn_path (rtx_insn *insn)
53052 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
53054 if ((int)path == 0)
53055 return path_single;
53057 if ((int)path == 1)
53058 return path_double;
53063 /* Return insn dispatch group. */
53065 static enum dispatch_group
53066 get_insn_group (rtx_insn *insn)
53068 enum dispatch_group group = get_mem_group (insn);
53072 if (is_branch (insn))
53073 return disp_branch;
53078 if (has_immediate (insn))
53081 if (is_prefetch (insn))
53082 return disp_prefetch;
53084 return disp_no_group;
53087 /* Count number of GROUP restricted instructions in a dispatch
53088 window WINDOW_LIST. */
53091 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
53093 enum dispatch_group group = get_insn_group (insn);
53095 int num_imm_operand;
53096 int num_imm32_operand;
53097 int num_imm64_operand;
53099 if (group == disp_no_group)
53102 if (group == disp_imm)
53104 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53105 &num_imm64_operand);
53106 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
53107 || num_imm_operand + window_list->num_imm > MAX_IMM
53108 || (num_imm32_operand > 0
53109 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
53110 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
53111 || (num_imm64_operand > 0
53112 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
53113 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
53114 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
53115 && num_imm64_operand > 0
53116 && ((window_list->num_imm_64 > 0
53117 && window_list->num_insn >= 2)
53118 || window_list->num_insn >= 3)))
53124 if ((group == disp_load_store
53125 && (window_list->num_loads >= MAX_LOAD
53126 || window_list->num_stores >= MAX_STORE))
53127 || ((group == disp_load
53128 || group == disp_prefetch)
53129 && window_list->num_loads >= MAX_LOAD)
53130 || (group == disp_store
53131 && window_list->num_stores >= MAX_STORE))
53137 /* This function returns true if insn satisfies dispatch rules on the
53138 last window scheduled. */
53141 fits_dispatch_window (rtx_insn *insn)
53143 dispatch_windows *window_list = dispatch_window_list;
53144 dispatch_windows *window_list_next = dispatch_window_list->next;
53145 unsigned int num_restrict;
53146 enum dispatch_group group = get_insn_group (insn);
53147 enum insn_path path = get_insn_path (insn);
53150 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
53151 instructions should be given the lowest priority in the
53152 scheduling process in Haifa scheduler to make sure they will be
53153 scheduled in the same dispatch window as the reference to them. */
53154 if (group == disp_jcc || group == disp_cmp)
53157 /* Check nonrestricted. */
53158 if (group == disp_no_group || group == disp_branch)
53161 /* Get last dispatch window. */
53162 if (window_list_next)
53163 window_list = window_list_next;
53165 if (window_list->window_num == 1)
53167 sum = window_list->prev->window_size + window_list->window_size;
53170 || (min_insn_size (insn) + sum) >= 48)
53171 /* Window 1 is full. Go for next window. */
53175 num_restrict = count_num_restricted (insn, window_list);
53177 if (num_restrict > num_allowable_groups[group])
53180 /* See if it fits in the first window. */
53181 if (window_list->window_num == 0)
53183 /* The first widow should have only single and double path
53185 if (path == path_double
53186 && (window_list->num_uops + 2) > MAX_INSN)
53188 else if (path != path_single)
53194 /* Add an instruction INSN with NUM_UOPS micro-operations to the
53195 dispatch window WINDOW_LIST. */
53198 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
53200 int byte_len = min_insn_size (insn);
53201 int num_insn = window_list->num_insn;
53203 sched_insn_info *window = window_list->window;
53204 enum dispatch_group group = get_insn_group (insn);
53205 enum insn_path path = get_insn_path (insn);
53206 int num_imm_operand;
53207 int num_imm32_operand;
53208 int num_imm64_operand;
53210 if (!window_list->violation && group != disp_cmp
53211 && !fits_dispatch_window (insn))
53212 window_list->violation = true;
53214 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53215 &num_imm64_operand);
53217 /* Initialize window with new instruction. */
53218 window[num_insn].insn = insn;
53219 window[num_insn].byte_len = byte_len;
53220 window[num_insn].group = group;
53221 window[num_insn].path = path;
53222 window[num_insn].imm_bytes = imm_size;
53224 window_list->window_size += byte_len;
53225 window_list->num_insn = num_insn + 1;
53226 window_list->num_uops = window_list->num_uops + num_uops;
53227 window_list->imm_size += imm_size;
53228 window_list->num_imm += num_imm_operand;
53229 window_list->num_imm_32 += num_imm32_operand;
53230 window_list->num_imm_64 += num_imm64_operand;
53232 if (group == disp_store)
53233 window_list->num_stores += 1;
53234 else if (group == disp_load
53235 || group == disp_prefetch)
53236 window_list->num_loads += 1;
53237 else if (group == disp_load_store)
53239 window_list->num_stores += 1;
53240 window_list->num_loads += 1;
53244 /* Adds a scheduled instruction, INSN, to the current dispatch window.
53245 If the total bytes of instructions or the number of instructions in
53246 the window exceed allowable, it allocates a new window. */
53249 add_to_dispatch_window (rtx_insn *insn)
53252 dispatch_windows *window_list;
53253 dispatch_windows *next_list;
53254 dispatch_windows *window0_list;
53255 enum insn_path path;
53256 enum dispatch_group insn_group;
53264 if (INSN_CODE (insn) < 0)
53267 byte_len = min_insn_size (insn);
53268 window_list = dispatch_window_list;
53269 next_list = window_list->next;
53270 path = get_insn_path (insn);
53271 insn_group = get_insn_group (insn);
53273 /* Get the last dispatch window. */
53275 window_list = dispatch_window_list->next;
53277 if (path == path_single)
53279 else if (path == path_double)
53282 insn_num_uops = (int) path;
53284 /* If current window is full, get a new window.
53285 Window number zero is full, if MAX_INSN uops are scheduled in it.
53286 Window number one is full, if window zero's bytes plus window
53287 one's bytes is 32, or if the bytes of the new instruction added
53288 to the total makes it greater than 48, or it has already MAX_INSN
53289 instructions in it. */
53290 num_insn = window_list->num_insn;
53291 num_uops = window_list->num_uops;
53292 window_num = window_list->window_num;
53293 insn_fits = fits_dispatch_window (insn);
53295 if (num_insn >= MAX_INSN
53296 || num_uops + insn_num_uops > MAX_INSN
53299 window_num = ~window_num & 1;
53300 window_list = allocate_next_window (window_num);
53303 if (window_num == 0)
53305 add_insn_window (insn, window_list, insn_num_uops);
53306 if (window_list->num_insn >= MAX_INSN
53307 && insn_group == disp_branch)
53309 process_end_window ();
53313 else if (window_num == 1)
53315 window0_list = window_list->prev;
53316 sum = window0_list->window_size + window_list->window_size;
53318 || (byte_len + sum) >= 48)
53320 process_end_window ();
53321 window_list = dispatch_window_list;
53324 add_insn_window (insn, window_list, insn_num_uops);
53327 gcc_unreachable ();
53329 if (is_end_basic_block (insn_group))
53331 /* End of basic block is reached do end-basic-block process. */
53332 process_end_window ();
53337 /* Print the dispatch window, WINDOW_NUM, to FILE. */
53339 DEBUG_FUNCTION static void
53340 debug_dispatch_window_file (FILE *file, int window_num)
53342 dispatch_windows *list;
53345 if (window_num == 0)
53346 list = dispatch_window_list;
53348 list = dispatch_window_list1;
53350 fprintf (file, "Window #%d:\n", list->window_num);
53351 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
53352 list->num_insn, list->num_uops, list->window_size);
53353 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53354 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
53356 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
53358 fprintf (file, " insn info:\n");
53360 for (i = 0; i < MAX_INSN; i++)
53362 if (!list->window[i].insn)
53364 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
53365 i, group_name[list->window[i].group],
53366 i, (void *)list->window[i].insn,
53367 i, list->window[i].path,
53368 i, list->window[i].byte_len,
53369 i, list->window[i].imm_bytes);
53373 /* Print to stdout a dispatch window. */
53375 DEBUG_FUNCTION void
53376 debug_dispatch_window (int window_num)
53378 debug_dispatch_window_file (stdout, window_num);
53381 /* Print INSN dispatch information to FILE. */
53383 DEBUG_FUNCTION static void
53384 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
53387 enum insn_path path;
53388 enum dispatch_group group;
53390 int num_imm_operand;
53391 int num_imm32_operand;
53392 int num_imm64_operand;
53394 if (INSN_CODE (insn) < 0)
53397 byte_len = min_insn_size (insn);
53398 path = get_insn_path (insn);
53399 group = get_insn_group (insn);
53400 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53401 &num_imm64_operand);
53403 fprintf (file, " insn info:\n");
53404 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
53405 group_name[group], path, byte_len);
53406 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53407 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
53410 /* Print to STDERR the status of the ready list with respect to
53411 dispatch windows. */
53413 DEBUG_FUNCTION void
53414 debug_ready_dispatch (void)
53417 int no_ready = number_in_ready ();
53419 fprintf (stdout, "Number of ready: %d\n", no_ready);
53421 for (i = 0; i < no_ready; i++)
53422 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
53425 /* This routine is the driver of the dispatch scheduler. */
53428 do_dispatch (rtx_insn *insn, int mode)
53430 if (mode == DISPATCH_INIT)
53431 init_dispatch_sched ();
53432 else if (mode == ADD_TO_DISPATCH_WINDOW)
53433 add_to_dispatch_window (insn);
53436 /* Return TRUE if Dispatch Scheduling is supported. */
53439 has_dispatch (rtx_insn *insn, int action)
53441 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
53442 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
53448 case IS_DISPATCH_ON:
53453 return is_cmp (insn);
53455 case DISPATCH_VIOLATION:
53456 return dispatch_violation ();
53458 case FITS_DISPATCH_WINDOW:
53459 return fits_dispatch_window (insn);
53465 /* Implementation of reassociation_width target hook used by
53466 reassoc phase to identify parallelism level in reassociated
53467 tree. Statements tree_code is passed in OPC. Arguments type
53470 Currently parallel reassociation is enabled for Atom
53471 processors only and we set reassociation width to be 2
53472 because Atom may issue up to 2 instructions per cycle.
53474 Return value should be fixed if parallel reassociation is
53475 enabled for other processors. */
53478 ix86_reassociation_width (unsigned int, machine_mode mode)
53481 if (VECTOR_MODE_P (mode))
53483 if (TARGET_VECTOR_PARALLEL_EXECUTION)
53490 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
53492 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
53493 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
53498 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
53499 place emms and femms instructions. */
53501 static machine_mode
53502 ix86_preferred_simd_mode (machine_mode mode)
53510 return TARGET_AVX512BW ? V64QImode :
53511 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
53513 return TARGET_AVX512BW ? V32HImode :
53514 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
53516 return TARGET_AVX512F ? V16SImode :
53517 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
53519 return TARGET_AVX512F ? V8DImode :
53520 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
53523 if (TARGET_AVX512F)
53525 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53531 if (!TARGET_VECTORIZE_DOUBLE)
53533 else if (TARGET_AVX512F)
53535 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53537 else if (TARGET_SSE2)
53546 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
53547 vectors. If AVX512F is enabled then try vectorizing with 512bit,
53548 256bit and 128bit vectors. */
53550 static unsigned int
53551 ix86_autovectorize_vector_sizes (void)
53553 return TARGET_AVX512F ? 64 | 32 | 16 :
53554 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
53557 /* Implemenation of targetm.vectorize.get_mask_mode. */
53559 static machine_mode
53560 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
53562 unsigned elem_size = vector_size / nunits;
53564 /* Scalar mask case. */
53565 if ((TARGET_AVX512F && vector_size == 64)
53566 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
53568 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
53569 return smallest_mode_for_size (nunits, MODE_INT);
53572 machine_mode elem_mode
53573 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
53575 gcc_assert (elem_size * nunits == vector_size);
53577 return mode_for_vector (elem_mode, nunits);
53582 /* Return class of registers which could be used for pseudo of MODE
53583 and of class RCLASS for spilling instead of memory. Return NO_REGS
53584 if it is not possible or non-profitable. */
53586 ix86_spill_class (reg_class_t rclass, machine_mode mode)
53588 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
53589 && (mode == SImode || (TARGET_64BIT && mode == DImode))
53590 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
53591 return ALL_SSE_REGS;
53595 /* Implement targetm.vectorize.init_cost. */
53598 ix86_init_cost (struct loop *)
53600 unsigned *cost = XNEWVEC (unsigned, 3);
53601 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53605 /* Implement targetm.vectorize.add_stmt_cost. */
53608 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53609 struct _stmt_vec_info *stmt_info, int misalign,
53610 enum vect_cost_model_location where)
53612 unsigned *cost = (unsigned *) data;
53613 unsigned retval = 0;
53615 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53616 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53618 /* Statements in an inner loop relative to the loop being
53619 vectorized are weighted more heavily. The value here is
53620 arbitrary and could potentially be improved with analysis. */
53621 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53622 count *= 50; /* FIXME. */
53624 retval = (unsigned) (count * stmt_cost);
53626 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53627 for Silvermont as it has out of order integer pipeline and can execute
53628 2 scalar instruction per tick, but has in order SIMD pipeline. */
53629 if (TARGET_SILVERMONT || TARGET_INTEL)
53630 if (stmt_info && stmt_info->stmt)
53632 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53633 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53634 retval = (retval * 17) / 10;
53637 cost[where] += retval;
53642 /* Implement targetm.vectorize.finish_cost. */
53645 ix86_finish_cost (void *data, unsigned *prologue_cost,
53646 unsigned *body_cost, unsigned *epilogue_cost)
53648 unsigned *cost = (unsigned *) data;
53649 *prologue_cost = cost[vect_prologue];
53650 *body_cost = cost[vect_body];
53651 *epilogue_cost = cost[vect_epilogue];
53654 /* Implement targetm.vectorize.destroy_cost_data. */
53657 ix86_destroy_cost_data (void *data)
53662 /* Validate target specific memory model bits in VAL. */
53664 static unsigned HOST_WIDE_INT
53665 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53667 enum memmodel model = memmodel_from_int (val);
53670 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53672 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53674 warning (OPT_Winvalid_memory_model,
53675 "Unknown architecture specific memory model");
53676 return MEMMODEL_SEQ_CST;
53678 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53679 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53681 warning (OPT_Winvalid_memory_model,
53682 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53683 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53685 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53687 warning (OPT_Winvalid_memory_model,
53688 "HLE_RELEASE not used with RELEASE or stronger memory model");
53689 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53694 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53695 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53696 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53697 or number of vecsize_mangle variants that should be emitted. */
53700 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53701 struct cgraph_simd_clone *clonei,
53702 tree base_type, int num)
53706 if (clonei->simdlen
53707 && (clonei->simdlen < 2
53708 || clonei->simdlen > 16
53709 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53711 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53712 "unsupported simdlen %d", clonei->simdlen);
53716 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53717 if (TREE_CODE (ret_type) != VOID_TYPE)
53718 switch (TYPE_MODE (ret_type))
53730 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53731 "unsupported return type %qT for simd\n", ret_type);
53738 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53739 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53740 switch (TYPE_MODE (TREE_TYPE (t)))
53752 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53753 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53757 if (clonei->cilk_elemental)
53759 /* Parse here processor clause. If not present, default to 'b'. */
53760 clonei->vecsize_mangle = 'b';
53762 else if (!TREE_PUBLIC (node->decl))
53764 /* If the function isn't exported, we can pick up just one ISA
53767 clonei->vecsize_mangle = 'd';
53768 else if (TARGET_AVX)
53769 clonei->vecsize_mangle = 'c';
53771 clonei->vecsize_mangle = 'b';
53776 clonei->vecsize_mangle = "bcd"[num];
53779 switch (clonei->vecsize_mangle)
53782 clonei->vecsize_int = 128;
53783 clonei->vecsize_float = 128;
53786 clonei->vecsize_int = 128;
53787 clonei->vecsize_float = 256;
53790 clonei->vecsize_int = 256;
53791 clonei->vecsize_float = 256;
53794 if (clonei->simdlen == 0)
53796 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53797 clonei->simdlen = clonei->vecsize_int;
53799 clonei->simdlen = clonei->vecsize_float;
53800 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53801 if (clonei->simdlen > 16)
53802 clonei->simdlen = 16;
53807 /* Add target attribute to SIMD clone NODE if needed. */
53810 ix86_simd_clone_adjust (struct cgraph_node *node)
53812 const char *str = NULL;
53813 gcc_assert (node->decl == cfun->decl);
53814 switch (node->simdclone->vecsize_mangle)
53829 gcc_unreachable ();
53834 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53835 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53838 ix86_reset_previous_fndecl ();
53839 ix86_set_current_function (node->decl);
53842 /* If SIMD clone NODE can't be used in a vectorized loop
53843 in current function, return -1, otherwise return a badness of using it
53844 (0 if it is most desirable from vecsize_mangle point of view, 1
53845 slightly less desirable, etc.). */
53848 ix86_simd_clone_usable (struct cgraph_node *node)
53850 switch (node->simdclone->vecsize_mangle)
53857 return TARGET_AVX2 ? 2 : 1;
53861 return TARGET_AVX2 ? 1 : 0;
53868 gcc_unreachable ();
53872 /* This function adjusts the unroll factor based on
53873 the hardware capabilities. For ex, bdver3 has
53874 a loop buffer which makes unrolling of smaller
53875 loops less important. This function decides the
53876 unroll factor using number of memory references
53877 (value 32 is used) as a heuristic. */
53880 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53885 unsigned mem_count = 0;
53887 if (!TARGET_ADJUST_UNROLL)
53890 /* Count the number of memory references within the loop body.
53891 This value determines the unrolling factor for bdver3 and bdver4
53893 subrtx_iterator::array_type array;
53894 bbs = get_loop_body (loop);
53895 for (i = 0; i < loop->num_nodes; i++)
53896 FOR_BB_INSNS (bbs[i], insn)
53897 if (NONDEBUG_INSN_P (insn))
53898 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53899 if (const_rtx x = *iter)
53902 machine_mode mode = GET_MODE (x);
53903 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53911 if (mem_count && mem_count <=32)
53912 return 32/mem_count;
53918 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53921 ix86_float_exceptions_rounding_supported_p (void)
53923 /* For x87 floating point with standard excess precision handling,
53924 there is no adddf3 pattern (since x87 floating point only has
53925 XFmode operations) so the default hook implementation gets this
53927 return TARGET_80387 || TARGET_SSE_MATH;
53930 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53933 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53935 if (!TARGET_80387 && !TARGET_SSE_MATH)
53937 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53940 tree fenv_index_type = build_index_type (size_int (6));
53941 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53942 tree fenv_var = create_tmp_var_raw (fenv_type);
53943 TREE_ADDRESSABLE (fenv_var) = 1;
53944 tree fenv_ptr = build_pointer_type (fenv_type);
53945 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53946 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53947 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53948 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53949 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53950 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53951 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
53952 tree hold_fnclex = build_call_expr (fnclex, 0);
53953 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
53954 NULL_TREE, NULL_TREE);
53955 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
53957 *clear = build_call_expr (fnclex, 0);
53958 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
53959 tree fnstsw_call = build_call_expr (fnstsw, 0);
53960 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
53961 sw_var, fnstsw_call);
53962 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
53963 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
53964 exceptions_var, exceptions_x87);
53965 *update = build2 (COMPOUND_EXPR, integer_type_node,
53966 sw_mod, update_mod);
53967 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
53968 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
53970 if (TARGET_SSE_MATH)
53972 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
53973 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
53974 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
53975 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
53976 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
53977 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
53978 mxcsr_orig_var, stmxcsr_hold_call);
53979 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
53981 build_int_cst (unsigned_type_node, 0x1f80));
53982 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
53983 build_int_cst (unsigned_type_node, 0xffffffc0));
53984 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
53985 mxcsr_mod_var, hold_mod_val);
53986 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53987 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
53988 hold_assign_orig, hold_assign_mod);
53989 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
53990 ldmxcsr_hold_call);
53992 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
53995 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53997 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
53998 ldmxcsr_clear_call);
54000 *clear = ldmxcsr_clear_call;
54001 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
54002 tree exceptions_sse = fold_convert (integer_type_node,
54003 stxmcsr_update_call);
54006 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
54007 exceptions_var, exceptions_sse);
54008 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
54009 exceptions_var, exceptions_mod);
54010 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
54011 exceptions_assign);
54014 *update = build2 (MODIFY_EXPR, integer_type_node,
54015 exceptions_var, exceptions_sse);
54016 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
54017 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54018 ldmxcsr_update_call);
54020 tree atomic_feraiseexcept
54021 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
54022 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
54023 1, exceptions_var);
54024 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54025 atomic_feraiseexcept_call);
54028 /* Return mode to be used for bounds or VOIDmode
54029 if bounds are not supported. */
54031 static enum machine_mode
54032 ix86_mpx_bound_mode ()
54034 /* Do not support pointer checker if MPX
54038 if (flag_check_pointer_bounds)
54039 warning (0, "Pointer Checker requires MPX support on this target."
54040 " Use -mmpx options to enable MPX.");
54047 /* Return constant used to statically initialize constant bounds.
54049 This function is used to create special bound values. For now
54050 only INIT bounds and NONE bounds are expected. More special
54051 values may be added later. */
54054 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
54056 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
54057 : build_zero_cst (pointer_sized_int_node);
54058 tree high = ub ? build_zero_cst (pointer_sized_int_node)
54059 : build_minus_one_cst (pointer_sized_int_node);
54061 /* This function is supposed to be used to create INIT and
54062 NONE bounds only. */
54063 gcc_assert ((lb == 0 && ub == -1)
54064 || (lb == -1 && ub == 0));
54066 return build_complex (NULL, low, high);
54069 /* Generate a list of statements STMTS to initialize pointer bounds
54070 variable VAR with bounds LB and UB. Return the number of generated
54074 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
54076 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
54077 tree lhs, modify, var_p;
54079 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
54080 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
54082 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
54083 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
54084 append_to_statement_list (modify, stmts);
54086 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
54087 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
54088 TYPE_SIZE_UNIT (pointer_sized_int_node)));
54089 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
54090 append_to_statement_list (modify, stmts);
54095 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
54096 /* For i386, common symbol is local only for non-PIE binaries. For
54097 x86-64, common symbol is local only for non-PIE binaries or linker
54098 supports copy reloc in PIE binaries. */
54101 ix86_binds_local_p (const_tree exp)
54103 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
54106 && HAVE_LD_PIE_COPYRELOC != 0)));
54110 /* If MEM is in the form of [base+offset], extract the two parts
54111 of address and set to BASE and OFFSET, otherwise return false. */
54114 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
54118 gcc_assert (MEM_P (mem));
54120 addr = XEXP (mem, 0);
54122 if (GET_CODE (addr) == CONST)
54123 addr = XEXP (addr, 0);
54125 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
54128 *offset = const0_rtx;
54132 if (GET_CODE (addr) == PLUS
54133 && (REG_P (XEXP (addr, 0))
54134 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
54135 && CONST_INT_P (XEXP (addr, 1)))
54137 *base = XEXP (addr, 0);
54138 *offset = XEXP (addr, 1);
54145 /* Given OPERANDS of consecutive load/store, check if we can merge
54146 them into move multiple. LOAD is true if they are load instructions.
54147 MODE is the mode of memory operands. */
54150 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
54151 enum machine_mode mode)
54153 HOST_WIDE_INT offval_1, offval_2, msize;
54154 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
54158 mem_1 = operands[1];
54159 mem_2 = operands[3];
54160 reg_1 = operands[0];
54161 reg_2 = operands[2];
54165 mem_1 = operands[0];
54166 mem_2 = operands[2];
54167 reg_1 = operands[1];
54168 reg_2 = operands[3];
54171 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
54173 if (REGNO (reg_1) != REGNO (reg_2))
54176 /* Check if the addresses are in the form of [base+offset]. */
54177 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
54179 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
54182 /* Check if the bases are the same. */
54183 if (!rtx_equal_p (base_1, base_2))
54186 offval_1 = INTVAL (offset_1);
54187 offval_2 = INTVAL (offset_2);
54188 msize = GET_MODE_SIZE (mode);
54189 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
54190 if (offval_1 + msize != offval_2)
54196 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
54199 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
54200 optimization_type opt_type)
54214 return opt_type == OPTIMIZE_FOR_SPEED;
54217 if (SSE_FLOAT_MODE_P (mode1)
54219 && !flag_trapping_math
54221 return opt_type == OPTIMIZE_FOR_SPEED;
54227 if (SSE_FLOAT_MODE_P (mode1)
54229 && !flag_trapping_math
54232 return opt_type == OPTIMIZE_FOR_SPEED;
54235 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
54242 /* Address space support.
54244 This is not "far pointers" in the 16-bit sense, but an easy way
54245 to use %fs and %gs segment prefixes. Therefore:
54247 (a) All address spaces have the same modes,
54248 (b) All address spaces have the same addresss forms,
54249 (c) While %fs and %gs are technically subsets of the generic
54250 address space, they are probably not subsets of each other.
54251 (d) Since we have no access to the segment base register values
54252 without resorting to a system call, we cannot convert a
54253 non-default address space to a default address space.
54254 Therefore we do not claim %fs or %gs are subsets of generic.
54255 (e) However, __seg_tls uses UNSPEC_TP as the base (which itself is
54256 stored at __seg_tls:0) so we can map between tls and generic. */
54259 ix86_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
54261 return (subset == superset
54262 || (superset == ADDR_SPACE_GENERIC
54263 && subset == ADDR_SPACE_SEG_TLS));
54265 #undef TARGET_ADDR_SPACE_SUBSET_P
54266 #define TARGET_ADDR_SPACE_SUBSET_P ix86_addr_space_subset_p
54269 ix86_addr_space_convert (rtx op, tree from_type, tree to_type)
54271 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
54272 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
54274 /* Conversion between SEG_TLS and GENERIC is handled by adding or
54275 subtracting the thread pointer. */
54276 if ((from_as == ADDR_SPACE_GENERIC && to_as == ADDR_SPACE_SEG_TLS)
54277 || (from_as == ADDR_SPACE_SEG_TLS && to_as == ADDR_SPACE_GENERIC))
54279 machine_mode mode = GET_MODE (op);
54280 if (mode == VOIDmode)
54282 rtx tp = get_thread_pointer (mode, optimize || mode != ptr_mode);
54283 return expand_binop (mode, (to_as == ADDR_SPACE_GENERIC
54284 ? add_optab : sub_optab),
54285 op, tp, NULL, 1, OPTAB_WIDEN);
54290 #undef TARGET_ADDR_SPACE_CONVERT
54291 #define TARGET_ADDR_SPACE_CONVERT ix86_addr_space_convert
54294 ix86_addr_space_debug (addr_space_t as)
54296 /* Fold __seg_tls to __seg_fs or __seg_gs for debugging. */
54297 if (as == ADDR_SPACE_SEG_TLS)
54298 as = DEFAULT_TLS_SEG_REG;
54301 #undef TARGET_ADDR_SPACE_DEBUG
54302 #define TARGET_ADDR_SPACE_DEBUG ix86_addr_space_debug
54304 /* All use of segmentation is assumed to make address 0 valid. */
54307 ix86_addr_space_zero_address_valid (addr_space_t as)
54309 return as != ADDR_SPACE_GENERIC;
54311 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
54312 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
54314 /* Initialize the GCC target structure. */
54315 #undef TARGET_RETURN_IN_MEMORY
54316 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
54318 #undef TARGET_LEGITIMIZE_ADDRESS
54319 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
54321 #undef TARGET_ATTRIBUTE_TABLE
54322 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
54323 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
54324 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
54325 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54326 # undef TARGET_MERGE_DECL_ATTRIBUTES
54327 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
54330 #undef TARGET_COMP_TYPE_ATTRIBUTES
54331 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
54333 #undef TARGET_INIT_BUILTINS
54334 #define TARGET_INIT_BUILTINS ix86_init_builtins
54335 #undef TARGET_BUILTIN_DECL
54336 #define TARGET_BUILTIN_DECL ix86_builtin_decl
54337 #undef TARGET_EXPAND_BUILTIN
54338 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
54340 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
54341 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
54342 ix86_builtin_vectorized_function
54344 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
54345 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
54347 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
54348 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
54350 #undef TARGET_VECTORIZE_BUILTIN_GATHER
54351 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
54353 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
54354 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
54356 #undef TARGET_BUILTIN_RECIPROCAL
54357 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
54359 #undef TARGET_ASM_FUNCTION_EPILOGUE
54360 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
54362 #undef TARGET_ENCODE_SECTION_INFO
54363 #ifndef SUBTARGET_ENCODE_SECTION_INFO
54364 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
54366 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
54369 #undef TARGET_ASM_OPEN_PAREN
54370 #define TARGET_ASM_OPEN_PAREN ""
54371 #undef TARGET_ASM_CLOSE_PAREN
54372 #define TARGET_ASM_CLOSE_PAREN ""
54374 #undef TARGET_ASM_BYTE_OP
54375 #define TARGET_ASM_BYTE_OP ASM_BYTE
54377 #undef TARGET_ASM_ALIGNED_HI_OP
54378 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
54379 #undef TARGET_ASM_ALIGNED_SI_OP
54380 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
54382 #undef TARGET_ASM_ALIGNED_DI_OP
54383 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
54386 #undef TARGET_PROFILE_BEFORE_PROLOGUE
54387 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
54389 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
54390 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
54392 #undef TARGET_ASM_UNALIGNED_HI_OP
54393 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
54394 #undef TARGET_ASM_UNALIGNED_SI_OP
54395 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
54396 #undef TARGET_ASM_UNALIGNED_DI_OP
54397 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
54399 #undef TARGET_PRINT_OPERAND
54400 #define TARGET_PRINT_OPERAND ix86_print_operand
54401 #undef TARGET_PRINT_OPERAND_ADDRESS
54402 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
54403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
54404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
54405 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
54406 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
54408 #undef TARGET_SCHED_INIT_GLOBAL
54409 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
54410 #undef TARGET_SCHED_ADJUST_COST
54411 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
54412 #undef TARGET_SCHED_ISSUE_RATE
54413 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
54414 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
54415 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
54416 ia32_multipass_dfa_lookahead
54417 #undef TARGET_SCHED_MACRO_FUSION_P
54418 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
54419 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
54420 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
54422 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
54423 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
54425 #undef TARGET_MEMMODEL_CHECK
54426 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
54428 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
54429 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
54432 #undef TARGET_HAVE_TLS
54433 #define TARGET_HAVE_TLS true
54435 #undef TARGET_CANNOT_FORCE_CONST_MEM
54436 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
54437 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
54438 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
54440 #undef TARGET_DELEGITIMIZE_ADDRESS
54441 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
54443 #undef TARGET_MS_BITFIELD_LAYOUT_P
54444 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
54447 #undef TARGET_BINDS_LOCAL_P
54448 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
54450 #undef TARGET_BINDS_LOCAL_P
54451 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
54453 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54454 #undef TARGET_BINDS_LOCAL_P
54455 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
54458 #undef TARGET_ASM_OUTPUT_MI_THUNK
54459 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
54460 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
54461 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
54463 #undef TARGET_ASM_FILE_START
54464 #define TARGET_ASM_FILE_START x86_file_start
54466 #undef TARGET_OPTION_OVERRIDE
54467 #define TARGET_OPTION_OVERRIDE ix86_option_override
54469 #undef TARGET_REGISTER_MOVE_COST
54470 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
54471 #undef TARGET_MEMORY_MOVE_COST
54472 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
54473 #undef TARGET_RTX_COSTS
54474 #define TARGET_RTX_COSTS ix86_rtx_costs
54475 #undef TARGET_ADDRESS_COST
54476 #define TARGET_ADDRESS_COST ix86_address_cost
54478 #undef TARGET_FIXED_CONDITION_CODE_REGS
54479 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
54480 #undef TARGET_CC_MODES_COMPATIBLE
54481 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
54483 #undef TARGET_MACHINE_DEPENDENT_REORG
54484 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
54486 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
54487 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
54489 #undef TARGET_BUILD_BUILTIN_VA_LIST
54490 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
54492 #undef TARGET_FOLD_BUILTIN
54493 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
54495 #undef TARGET_COMPARE_VERSION_PRIORITY
54496 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
54498 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
54499 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
54500 ix86_generate_version_dispatcher_body
54502 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
54503 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
54504 ix86_get_function_versions_dispatcher
54506 #undef TARGET_ENUM_VA_LIST_P
54507 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
54509 #undef TARGET_FN_ABI_VA_LIST
54510 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
54512 #undef TARGET_CANONICAL_VA_LIST_TYPE
54513 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
54515 #undef TARGET_EXPAND_BUILTIN_VA_START
54516 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
54518 #undef TARGET_MD_ASM_ADJUST
54519 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
54521 #undef TARGET_PROMOTE_PROTOTYPES
54522 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
54523 #undef TARGET_SETUP_INCOMING_VARARGS
54524 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
54525 #undef TARGET_MUST_PASS_IN_STACK
54526 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
54527 #undef TARGET_FUNCTION_ARG_ADVANCE
54528 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
54529 #undef TARGET_FUNCTION_ARG
54530 #define TARGET_FUNCTION_ARG ix86_function_arg
54531 #undef TARGET_INIT_PIC_REG
54532 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
54533 #undef TARGET_USE_PSEUDO_PIC_REG
54534 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
54535 #undef TARGET_FUNCTION_ARG_BOUNDARY
54536 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
54537 #undef TARGET_PASS_BY_REFERENCE
54538 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
54539 #undef TARGET_INTERNAL_ARG_POINTER
54540 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
54541 #undef TARGET_UPDATE_STACK_BOUNDARY
54542 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
54543 #undef TARGET_GET_DRAP_RTX
54544 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
54545 #undef TARGET_STRICT_ARGUMENT_NAMING
54546 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
54547 #undef TARGET_STATIC_CHAIN
54548 #define TARGET_STATIC_CHAIN ix86_static_chain
54549 #undef TARGET_TRAMPOLINE_INIT
54550 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
54551 #undef TARGET_RETURN_POPS_ARGS
54552 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
54554 #undef TARGET_LEGITIMATE_COMBINED_INSN
54555 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
54557 #undef TARGET_ASAN_SHADOW_OFFSET
54558 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
54560 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
54561 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
54563 #undef TARGET_SCALAR_MODE_SUPPORTED_P
54564 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
54566 #undef TARGET_VECTOR_MODE_SUPPORTED_P
54567 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
54569 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
54570 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
54571 ix86_libgcc_floating_mode_supported_p
54573 #undef TARGET_C_MODE_FOR_SUFFIX
54574 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
54577 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
54578 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
54581 #ifdef SUBTARGET_INSERT_ATTRIBUTES
54582 #undef TARGET_INSERT_ATTRIBUTES
54583 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
54586 #undef TARGET_MANGLE_TYPE
54587 #define TARGET_MANGLE_TYPE ix86_mangle_type
54590 #undef TARGET_STACK_PROTECT_FAIL
54591 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
54594 #undef TARGET_FUNCTION_VALUE
54595 #define TARGET_FUNCTION_VALUE ix86_function_value
54597 #undef TARGET_FUNCTION_VALUE_REGNO_P
54598 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
54600 #undef TARGET_PROMOTE_FUNCTION_MODE
54601 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
54603 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
54604 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
54606 #undef TARGET_MEMBER_TYPE_FORCES_BLK
54607 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
54609 #undef TARGET_INSTANTIATE_DECLS
54610 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
54612 #undef TARGET_SECONDARY_RELOAD
54613 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
54615 #undef TARGET_CLASS_MAX_NREGS
54616 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
54618 #undef TARGET_PREFERRED_RELOAD_CLASS
54619 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
54620 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
54621 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
54622 #undef TARGET_CLASS_LIKELY_SPILLED_P
54623 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
54625 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
54626 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
54627 ix86_builtin_vectorization_cost
54628 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
54629 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
54630 ix86_vectorize_vec_perm_const_ok
54631 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
54632 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
54633 ix86_preferred_simd_mode
54634 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
54635 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
54636 ix86_autovectorize_vector_sizes
54637 #undef TARGET_VECTORIZE_GET_MASK_MODE
54638 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
54639 #undef TARGET_VECTORIZE_INIT_COST
54640 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
54641 #undef TARGET_VECTORIZE_ADD_STMT_COST
54642 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
54643 #undef TARGET_VECTORIZE_FINISH_COST
54644 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
54645 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
54646 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
54648 #undef TARGET_SET_CURRENT_FUNCTION
54649 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
54651 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
54652 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
54654 #undef TARGET_OPTION_SAVE
54655 #define TARGET_OPTION_SAVE ix86_function_specific_save
54657 #undef TARGET_OPTION_RESTORE
54658 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
54660 #undef TARGET_OPTION_POST_STREAM_IN
54661 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
54663 #undef TARGET_OPTION_PRINT
54664 #define TARGET_OPTION_PRINT ix86_function_specific_print
54666 #undef TARGET_OPTION_FUNCTION_VERSIONS
54667 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
54669 #undef TARGET_CAN_INLINE_P
54670 #define TARGET_CAN_INLINE_P ix86_can_inline_p
54672 #undef TARGET_LEGITIMATE_ADDRESS_P
54673 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
54675 #undef TARGET_LRA_P
54676 #define TARGET_LRA_P hook_bool_void_true
54678 #undef TARGET_REGISTER_PRIORITY
54679 #define TARGET_REGISTER_PRIORITY ix86_register_priority
54681 #undef TARGET_REGISTER_USAGE_LEVELING_P
54682 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
54684 #undef TARGET_LEGITIMATE_CONSTANT_P
54685 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
54687 #undef TARGET_FRAME_POINTER_REQUIRED
54688 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
54690 #undef TARGET_CAN_ELIMINATE
54691 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
54693 #undef TARGET_EXTRA_LIVE_ON_ENTRY
54694 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
54696 #undef TARGET_ASM_CODE_END
54697 #define TARGET_ASM_CODE_END ix86_code_end
54699 #undef TARGET_CONDITIONAL_REGISTER_USAGE
54700 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
54703 #undef TARGET_INIT_LIBFUNCS
54704 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
54707 #undef TARGET_LOOP_UNROLL_ADJUST
54708 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
54710 #undef TARGET_SPILL_CLASS
54711 #define TARGET_SPILL_CLASS ix86_spill_class
54713 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
54714 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
54715 ix86_simd_clone_compute_vecsize_and_simdlen
54717 #undef TARGET_SIMD_CLONE_ADJUST
54718 #define TARGET_SIMD_CLONE_ADJUST \
54719 ix86_simd_clone_adjust
54721 #undef TARGET_SIMD_CLONE_USABLE
54722 #define TARGET_SIMD_CLONE_USABLE \
54723 ix86_simd_clone_usable
54725 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54726 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54727 ix86_float_exceptions_rounding_supported_p
54729 #undef TARGET_MODE_EMIT
54730 #define TARGET_MODE_EMIT ix86_emit_mode_set
54732 #undef TARGET_MODE_NEEDED
54733 #define TARGET_MODE_NEEDED ix86_mode_needed
54735 #undef TARGET_MODE_AFTER
54736 #define TARGET_MODE_AFTER ix86_mode_after
54738 #undef TARGET_MODE_ENTRY
54739 #define TARGET_MODE_ENTRY ix86_mode_entry
54741 #undef TARGET_MODE_EXIT
54742 #define TARGET_MODE_EXIT ix86_mode_exit
54744 #undef TARGET_MODE_PRIORITY
54745 #define TARGET_MODE_PRIORITY ix86_mode_priority
54747 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54748 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54750 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54751 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54753 #undef TARGET_STORE_BOUNDS_FOR_ARG
54754 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54756 #undef TARGET_LOAD_RETURNED_BOUNDS
54757 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54759 #undef TARGET_STORE_RETURNED_BOUNDS
54760 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54762 #undef TARGET_CHKP_BOUND_MODE
54763 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54765 #undef TARGET_BUILTIN_CHKP_FUNCTION
54766 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54768 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54769 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54771 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54772 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54774 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54775 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54777 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54778 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54780 #undef TARGET_OFFLOAD_OPTIONS
54781 #define TARGET_OFFLOAD_OPTIONS \
54782 ix86_offload_options
54784 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54785 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54787 #undef TARGET_OPTAB_SUPPORTED_P
54788 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
54790 struct gcc_target targetm = TARGET_INITIALIZER;
54792 #include "gt-i386.h"