1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
31 #include "stringpool.h"
38 #include "diagnostic.h"
41 #include "fold-const.h"
44 #include "stor-layout.h"
47 #include "insn-attr.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
58 #include "tm-constrs.h"
61 #include "sched-int.h"
63 #include "tree-pass.h"
65 #include "pass_manager.h"
66 #include "target-globals.h"
67 #include "gimple-iterator.h"
68 #include "tree-vectorizer.h"
69 #include "shrink-wrap.h"
72 #include "tree-iterator.h"
73 #include "tree-chkp.h"
76 #include "case-cfn-macros.h"
77 #include "regrename.h"
79 /* This file should be included last. */
80 #include "target-def.h"
82 static rtx legitimize_dllimport_symbol (rtx, bool);
83 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
84 static rtx legitimize_pe_coff_symbol (rtx, bool);
85 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
87 #ifndef CHECK_STACK_LIMIT
88 #define CHECK_STACK_LIMIT (-1)
91 /* Return index of given mode in mult and division cost tables. */
92 #define MODE_INDEX(mode) \
93 ((mode) == QImode ? 0 \
94 : (mode) == HImode ? 1 \
95 : (mode) == SImode ? 2 \
96 : (mode) == DImode ? 3 \
99 /* Processor costs (relative to an add) */
100 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
101 #define COSTS_N_BYTES(N) ((N) * 2)
103 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
105 static stringop_algs ix86_size_memcpy[2] = {
106 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
107 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
108 static stringop_algs ix86_size_memset[2] = {
109 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
114 COSTS_N_BYTES (2), /* cost of an add instruction */
115 COSTS_N_BYTES (3), /* cost of a lea instruction */
116 COSTS_N_BYTES (2), /* variable shift costs */
117 COSTS_N_BYTES (3), /* constant shift costs */
118 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
119 COSTS_N_BYTES (3), /* HI */
120 COSTS_N_BYTES (3), /* SI */
121 COSTS_N_BYTES (3), /* DI */
122 COSTS_N_BYTES (5)}, /* other */
123 0, /* cost of multiply per each bit set */
124 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 COSTS_N_BYTES (3), /* cost of movsx */
130 COSTS_N_BYTES (3), /* cost of movzx */
131 0, /* "large" insn */
133 2, /* cost for loading QImode using movzbl */
134 {2, 2, 2}, /* cost of loading integer registers
135 in QImode, HImode and SImode.
136 Relative to reg-reg move (2). */
137 {2, 2, 2}, /* cost of storing integer registers */
138 2, /* cost of reg,reg fld/fst */
139 {2, 2, 2}, /* cost of loading fp registers
140 in SFmode, DFmode and XFmode */
141 {2, 2, 2}, /* cost of storing fp registers
142 in SFmode, DFmode and XFmode */
143 3, /* cost of moving MMX register */
144 {3, 3}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {3, 3}, /* cost of storing MMX registers
147 in SImode and DImode */
148 3, /* cost of moving SSE register */
149 {3, 3, 3}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {3, 3, 3}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3, /* MMX or SSE register to integer */
154 0, /* size of l1 cache */
155 0, /* size of l2 cache */
156 0, /* size of prefetch block */
157 0, /* number of parallel prefetches */
159 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
160 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
161 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
162 COSTS_N_BYTES (2), /* cost of FABS instruction. */
163 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
164 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 1, /* scalar_stmt_cost. */
168 1, /* scalar load_cost. */
169 1, /* scalar_store_cost. */
170 1, /* vec_stmt_cost. */
171 1, /* vec_to_scalar_cost. */
172 1, /* scalar_to_vec_cost. */
173 1, /* vec_align_load_cost. */
174 1, /* vec_unalign_load_cost. */
175 1, /* vec_store_cost. */
176 1, /* cond_taken_branch_cost. */
177 1, /* cond_not_taken_branch_cost. */
180 /* Processor costs (relative to an add) */
181 static stringop_algs i386_memcpy[2] = {
182 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
183 DUMMY_STRINGOP_ALGS};
184 static stringop_algs i386_memset[2] = {
185 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
186 DUMMY_STRINGOP_ALGS};
189 struct processor_costs i386_cost = { /* 386 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (6), /* HI */
196 COSTS_N_INSNS (6), /* SI */
197 COSTS_N_INSNS (6), /* DI */
198 COSTS_N_INSNS (6)}, /* other */
199 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (23), /* HI */
202 COSTS_N_INSNS (23), /* SI */
203 COSTS_N_INSNS (23), /* DI */
204 COSTS_N_INSNS (23)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of l1 cache */
231 0, /* size of l2 cache */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
235 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
236 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
237 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
238 COSTS_N_INSNS (22), /* cost of FABS instruction. */
239 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
240 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 1, /* scalar_stmt_cost. */
244 1, /* scalar load_cost. */
245 1, /* scalar_store_cost. */
246 1, /* vec_stmt_cost. */
247 1, /* vec_to_scalar_cost. */
248 1, /* scalar_to_vec_cost. */
249 1, /* vec_align_load_cost. */
250 2, /* vec_unalign_load_cost. */
251 1, /* vec_store_cost. */
252 3, /* cond_taken_branch_cost. */
253 1, /* cond_not_taken_branch_cost. */
256 static stringop_algs i486_memcpy[2] = {
257 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
258 DUMMY_STRINGOP_ALGS};
259 static stringop_algs i486_memset[2] = {
260 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
261 DUMMY_STRINGOP_ALGS};
264 struct processor_costs i486_cost = { /* 486 specific costs */
265 COSTS_N_INSNS (1), /* cost of an add instruction */
266 COSTS_N_INSNS (1), /* cost of a lea instruction */
267 COSTS_N_INSNS (3), /* variable shift costs */
268 COSTS_N_INSNS (2), /* constant shift costs */
269 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
270 COSTS_N_INSNS (12), /* HI */
271 COSTS_N_INSNS (12), /* SI */
272 COSTS_N_INSNS (12), /* DI */
273 COSTS_N_INSNS (12)}, /* other */
274 1, /* cost of multiply per each bit set */
275 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
276 COSTS_N_INSNS (40), /* HI */
277 COSTS_N_INSNS (40), /* SI */
278 COSTS_N_INSNS (40), /* DI */
279 COSTS_N_INSNS (40)}, /* other */
280 COSTS_N_INSNS (3), /* cost of movsx */
281 COSTS_N_INSNS (2), /* cost of movzx */
282 15, /* "large" insn */
284 4, /* cost for loading QImode using movzbl */
285 {2, 4, 2}, /* cost of loading integer registers
286 in QImode, HImode and SImode.
287 Relative to reg-reg move (2). */
288 {2, 4, 2}, /* cost of storing integer registers */
289 2, /* cost of reg,reg fld/fst */
290 {8, 8, 8}, /* cost of loading fp registers
291 in SFmode, DFmode and XFmode */
292 {8, 8, 8}, /* cost of storing fp registers
293 in SFmode, DFmode and XFmode */
294 2, /* cost of moving MMX register */
295 {4, 8}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {4, 8}, /* cost of storing MMX registers
298 in SImode and DImode */
299 2, /* cost of moving SSE register */
300 {4, 8, 16}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {4, 8, 16}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 3, /* MMX or SSE register to integer */
305 4, /* size of l1 cache. 486 has 8kB cache
306 shared for code and data, so 4kB is
307 not really precise. */
308 4, /* size of l2 cache */
309 0, /* size of prefetch block */
310 0, /* number of parallel prefetches */
312 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
313 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
314 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
315 COSTS_N_INSNS (3), /* cost of FABS instruction. */
316 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
317 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 1, /* scalar_stmt_cost. */
321 1, /* scalar load_cost. */
322 1, /* scalar_store_cost. */
323 1, /* vec_stmt_cost. */
324 1, /* vec_to_scalar_cost. */
325 1, /* scalar_to_vec_cost. */
326 1, /* vec_align_load_cost. */
327 2, /* vec_unalign_load_cost. */
328 1, /* vec_store_cost. */
329 3, /* cond_taken_branch_cost. */
330 1, /* cond_not_taken_branch_cost. */
333 static stringop_algs pentium_memcpy[2] = {
334 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
335 DUMMY_STRINGOP_ALGS};
336 static stringop_algs pentium_memset[2] = {
337 {libcall, {{-1, rep_prefix_4_byte, false}}},
338 DUMMY_STRINGOP_ALGS};
341 struct processor_costs pentium_cost = {
342 COSTS_N_INSNS (1), /* cost of an add instruction */
343 COSTS_N_INSNS (1), /* cost of a lea instruction */
344 COSTS_N_INSNS (4), /* variable shift costs */
345 COSTS_N_INSNS (1), /* constant shift costs */
346 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
347 COSTS_N_INSNS (11), /* HI */
348 COSTS_N_INSNS (11), /* SI */
349 COSTS_N_INSNS (11), /* DI */
350 COSTS_N_INSNS (11)}, /* other */
351 0, /* cost of multiply per each bit set */
352 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
353 COSTS_N_INSNS (25), /* HI */
354 COSTS_N_INSNS (25), /* SI */
355 COSTS_N_INSNS (25), /* DI */
356 COSTS_N_INSNS (25)}, /* other */
357 COSTS_N_INSNS (3), /* cost of movsx */
358 COSTS_N_INSNS (2), /* cost of movzx */
359 8, /* "large" insn */
361 6, /* cost for loading QImode using movzbl */
362 {2, 4, 2}, /* cost of loading integer registers
363 in QImode, HImode and SImode.
364 Relative to reg-reg move (2). */
365 {2, 4, 2}, /* cost of storing integer registers */
366 2, /* cost of reg,reg fld/fst */
367 {2, 2, 6}, /* cost of loading fp registers
368 in SFmode, DFmode and XFmode */
369 {4, 4, 6}, /* cost of storing fp registers
370 in SFmode, DFmode and XFmode */
371 8, /* cost of moving MMX register */
372 {8, 8}, /* cost of loading MMX registers
373 in SImode and DImode */
374 {8, 8}, /* cost of storing MMX registers
375 in SImode and DImode */
376 2, /* cost of moving SSE register */
377 {4, 8, 16}, /* cost of loading SSE registers
378 in SImode, DImode and TImode */
379 {4, 8, 16}, /* cost of storing SSE registers
380 in SImode, DImode and TImode */
381 3, /* MMX or SSE register to integer */
382 8, /* size of l1 cache. */
383 8, /* size of l2 cache */
384 0, /* size of prefetch block */
385 0, /* number of parallel prefetches */
387 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
388 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
389 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
390 COSTS_N_INSNS (1), /* cost of FABS instruction. */
391 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
392 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 1, /* scalar_stmt_cost. */
396 1, /* scalar load_cost. */
397 1, /* scalar_store_cost. */
398 1, /* vec_stmt_cost. */
399 1, /* vec_to_scalar_cost. */
400 1, /* scalar_to_vec_cost. */
401 1, /* vec_align_load_cost. */
402 2, /* vec_unalign_load_cost. */
403 1, /* vec_store_cost. */
404 3, /* cond_taken_branch_cost. */
405 1, /* cond_not_taken_branch_cost. */
409 struct processor_costs lakemont_cost = {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (11), /* HI */
416 COSTS_N_INSNS (11), /* SI */
417 COSTS_N_INSNS (11), /* DI */
418 COSTS_N_INSNS (11)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (25), /* HI */
422 COSTS_N_INSNS (25), /* SI */
423 COSTS_N_INSNS (25), /* DI */
424 COSTS_N_INSNS (25)}, /* other */
425 COSTS_N_INSNS (3), /* cost of movsx */
426 COSTS_N_INSNS (2), /* cost of movzx */
427 8, /* "large" insn */
429 6, /* cost for loading QImode using movzbl */
430 {2, 4, 2}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 4, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 8, /* cost of moving MMX register */
440 {8, 8}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {8, 8}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {4, 8, 16}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {4, 8, 16}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 8, /* size of l2 cache */
452 0, /* size of prefetch block */
453 0, /* number of parallel prefetches */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (1), /* cost of FABS instruction. */
459 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
476 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
477 (we ensure the alignment). For small blocks inline loop is still a
478 noticeable win, for bigger blocks either rep movsl or rep movsb is
479 way to go. Rep movsb has apparently more expensive startup time in CPU,
480 but after 4K the difference is down in the noise. */
481 static stringop_algs pentiumpro_memcpy[2] = {
482 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
483 {8192, rep_prefix_4_byte, false},
484 {-1, rep_prefix_1_byte, false}}},
485 DUMMY_STRINGOP_ALGS};
486 static stringop_algs pentiumpro_memset[2] = {
487 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
488 {8192, rep_prefix_4_byte, false},
489 {-1, libcall, false}}},
490 DUMMY_STRINGOP_ALGS};
492 struct processor_costs pentiumpro_cost = {
493 COSTS_N_INSNS (1), /* cost of an add instruction */
494 COSTS_N_INSNS (1), /* cost of a lea instruction */
495 COSTS_N_INSNS (1), /* variable shift costs */
496 COSTS_N_INSNS (1), /* constant shift costs */
497 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
498 COSTS_N_INSNS (4), /* HI */
499 COSTS_N_INSNS (4), /* SI */
500 COSTS_N_INSNS (4), /* DI */
501 COSTS_N_INSNS (4)}, /* other */
502 0, /* cost of multiply per each bit set */
503 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
504 COSTS_N_INSNS (17), /* HI */
505 COSTS_N_INSNS (17), /* SI */
506 COSTS_N_INSNS (17), /* DI */
507 COSTS_N_INSNS (17)}, /* other */
508 COSTS_N_INSNS (1), /* cost of movsx */
509 COSTS_N_INSNS (1), /* cost of movzx */
510 8, /* "large" insn */
512 2, /* cost for loading QImode using movzbl */
513 {4, 4, 4}, /* cost of loading integer registers
514 in QImode, HImode and SImode.
515 Relative to reg-reg move (2). */
516 {2, 2, 2}, /* cost of storing integer registers */
517 2, /* cost of reg,reg fld/fst */
518 {2, 2, 6}, /* cost of loading fp registers
519 in SFmode, DFmode and XFmode */
520 {4, 4, 6}, /* cost of storing fp registers
521 in SFmode, DFmode and XFmode */
522 2, /* cost of moving MMX register */
523 {2, 2}, /* cost of loading MMX registers
524 in SImode and DImode */
525 {2, 2}, /* cost of storing MMX registers
526 in SImode and DImode */
527 2, /* cost of moving SSE register */
528 {2, 2, 8}, /* cost of loading SSE registers
529 in SImode, DImode and TImode */
530 {2, 2, 8}, /* cost of storing SSE registers
531 in SImode, DImode and TImode */
532 3, /* MMX or SSE register to integer */
533 8, /* size of l1 cache. */
534 256, /* size of l2 cache */
535 32, /* size of prefetch block */
536 6, /* number of parallel prefetches */
538 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
539 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
540 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
541 COSTS_N_INSNS (2), /* cost of FABS instruction. */
542 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
543 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
546 1, /* scalar_stmt_cost. */
547 1, /* scalar load_cost. */
548 1, /* scalar_store_cost. */
549 1, /* vec_stmt_cost. */
550 1, /* vec_to_scalar_cost. */
551 1, /* scalar_to_vec_cost. */
552 1, /* vec_align_load_cost. */
553 2, /* vec_unalign_load_cost. */
554 1, /* vec_store_cost. */
555 3, /* cond_taken_branch_cost. */
556 1, /* cond_not_taken_branch_cost. */
559 static stringop_algs geode_memcpy[2] = {
560 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
561 DUMMY_STRINGOP_ALGS};
562 static stringop_algs geode_memset[2] = {
563 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
564 DUMMY_STRINGOP_ALGS};
566 struct processor_costs geode_cost = {
567 COSTS_N_INSNS (1), /* cost of an add instruction */
568 COSTS_N_INSNS (1), /* cost of a lea instruction */
569 COSTS_N_INSNS (2), /* variable shift costs */
570 COSTS_N_INSNS (1), /* constant shift costs */
571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
572 COSTS_N_INSNS (4), /* HI */
573 COSTS_N_INSNS (7), /* SI */
574 COSTS_N_INSNS (7), /* DI */
575 COSTS_N_INSNS (7)}, /* other */
576 0, /* cost of multiply per each bit set */
577 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
578 COSTS_N_INSNS (23), /* HI */
579 COSTS_N_INSNS (39), /* SI */
580 COSTS_N_INSNS (39), /* DI */
581 COSTS_N_INSNS (39)}, /* other */
582 COSTS_N_INSNS (1), /* cost of movsx */
583 COSTS_N_INSNS (1), /* cost of movzx */
584 8, /* "large" insn */
586 1, /* cost for loading QImode using movzbl */
587 {1, 1, 1}, /* cost of loading integer registers
588 in QImode, HImode and SImode.
589 Relative to reg-reg move (2). */
590 {1, 1, 1}, /* cost of storing integer registers */
591 1, /* cost of reg,reg fld/fst */
592 {1, 1, 1}, /* cost of loading fp registers
593 in SFmode, DFmode and XFmode */
594 {4, 6, 6}, /* cost of storing fp registers
595 in SFmode, DFmode and XFmode */
597 1, /* cost of moving MMX register */
598 {1, 1}, /* cost of loading MMX registers
599 in SImode and DImode */
600 {1, 1}, /* cost of storing MMX registers
601 in SImode and DImode */
602 1, /* cost of moving SSE register */
603 {1, 1, 1}, /* cost of loading SSE registers
604 in SImode, DImode and TImode */
605 {1, 1, 1}, /* cost of storing SSE registers
606 in SImode, DImode and TImode */
607 1, /* MMX or SSE register to integer */
608 64, /* size of l1 cache. */
609 128, /* size of l2 cache. */
610 32, /* size of prefetch block */
611 1, /* number of parallel prefetches */
613 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
614 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
615 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
616 COSTS_N_INSNS (1), /* cost of FABS instruction. */
617 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
618 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
621 1, /* scalar_stmt_cost. */
622 1, /* scalar load_cost. */
623 1, /* scalar_store_cost. */
624 1, /* vec_stmt_cost. */
625 1, /* vec_to_scalar_cost. */
626 1, /* scalar_to_vec_cost. */
627 1, /* vec_align_load_cost. */
628 2, /* vec_unalign_load_cost. */
629 1, /* vec_store_cost. */
630 3, /* cond_taken_branch_cost. */
631 1, /* cond_not_taken_branch_cost. */
634 static stringop_algs k6_memcpy[2] = {
635 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
636 DUMMY_STRINGOP_ALGS};
637 static stringop_algs k6_memset[2] = {
638 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
639 DUMMY_STRINGOP_ALGS};
641 struct processor_costs k6_cost = {
642 COSTS_N_INSNS (1), /* cost of an add instruction */
643 COSTS_N_INSNS (2), /* cost of a lea instruction */
644 COSTS_N_INSNS (1), /* variable shift costs */
645 COSTS_N_INSNS (1), /* constant shift costs */
646 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
647 COSTS_N_INSNS (3), /* HI */
648 COSTS_N_INSNS (3), /* SI */
649 COSTS_N_INSNS (3), /* DI */
650 COSTS_N_INSNS (3)}, /* other */
651 0, /* cost of multiply per each bit set */
652 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
653 COSTS_N_INSNS (18), /* HI */
654 COSTS_N_INSNS (18), /* SI */
655 COSTS_N_INSNS (18), /* DI */
656 COSTS_N_INSNS (18)}, /* other */
657 COSTS_N_INSNS (2), /* cost of movsx */
658 COSTS_N_INSNS (2), /* cost of movzx */
659 8, /* "large" insn */
661 3, /* cost for loading QImode using movzbl */
662 {4, 5, 4}, /* cost of loading integer registers
663 in QImode, HImode and SImode.
664 Relative to reg-reg move (2). */
665 {2, 3, 2}, /* cost of storing integer registers */
666 4, /* cost of reg,reg fld/fst */
667 {6, 6, 6}, /* cost of loading fp registers
668 in SFmode, DFmode and XFmode */
669 {4, 4, 4}, /* cost of storing fp registers
670 in SFmode, DFmode and XFmode */
671 2, /* cost of moving MMX register */
672 {2, 2}, /* cost of loading MMX registers
673 in SImode and DImode */
674 {2, 2}, /* cost of storing MMX registers
675 in SImode and DImode */
676 2, /* cost of moving SSE register */
677 {2, 2, 8}, /* cost of loading SSE registers
678 in SImode, DImode and TImode */
679 {2, 2, 8}, /* cost of storing SSE registers
680 in SImode, DImode and TImode */
681 6, /* MMX or SSE register to integer */
682 32, /* size of l1 cache. */
683 32, /* size of l2 cache. Some models
684 have integrated l2 cache, but
685 optimizing for k6 is not important
686 enough to worry about that. */
687 32, /* size of prefetch block */
688 1, /* number of parallel prefetches */
690 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
691 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
692 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
693 COSTS_N_INSNS (2), /* cost of FABS instruction. */
694 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
695 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
698 1, /* scalar_stmt_cost. */
699 1, /* scalar load_cost. */
700 1, /* scalar_store_cost. */
701 1, /* vec_stmt_cost. */
702 1, /* vec_to_scalar_cost. */
703 1, /* scalar_to_vec_cost. */
704 1, /* vec_align_load_cost. */
705 2, /* vec_unalign_load_cost. */
706 1, /* vec_store_cost. */
707 3, /* cond_taken_branch_cost. */
708 1, /* cond_not_taken_branch_cost. */
711 /* For some reason, Athlon deals better with REP prefix (relative to loops)
712 compared to K8. Alignment becomes important after 8 bytes for memcpy and
713 128 bytes for memset. */
714 static stringop_algs athlon_memcpy[2] = {
715 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
716 DUMMY_STRINGOP_ALGS};
717 static stringop_algs athlon_memset[2] = {
718 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
719 DUMMY_STRINGOP_ALGS};
721 struct processor_costs athlon_cost = {
722 COSTS_N_INSNS (1), /* cost of an add instruction */
723 COSTS_N_INSNS (2), /* cost of a lea instruction */
724 COSTS_N_INSNS (1), /* variable shift costs */
725 COSTS_N_INSNS (1), /* constant shift costs */
726 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
727 COSTS_N_INSNS (5), /* HI */
728 COSTS_N_INSNS (5), /* SI */
729 COSTS_N_INSNS (5), /* DI */
730 COSTS_N_INSNS (5)}, /* other */
731 0, /* cost of multiply per each bit set */
732 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
733 COSTS_N_INSNS (26), /* HI */
734 COSTS_N_INSNS (42), /* SI */
735 COSTS_N_INSNS (74), /* DI */
736 COSTS_N_INSNS (74)}, /* other */
737 COSTS_N_INSNS (1), /* cost of movsx */
738 COSTS_N_INSNS (1), /* cost of movzx */
739 8, /* "large" insn */
741 4, /* cost for loading QImode using movzbl */
742 {3, 4, 3}, /* cost of loading integer registers
743 in QImode, HImode and SImode.
744 Relative to reg-reg move (2). */
745 {3, 4, 3}, /* cost of storing integer registers */
746 4, /* cost of reg,reg fld/fst */
747 {4, 4, 12}, /* cost of loading fp registers
748 in SFmode, DFmode and XFmode */
749 {6, 6, 8}, /* cost of storing fp registers
750 in SFmode, DFmode and XFmode */
751 2, /* cost of moving MMX register */
752 {4, 4}, /* cost of loading MMX registers
753 in SImode and DImode */
754 {4, 4}, /* cost of storing MMX registers
755 in SImode and DImode */
756 2, /* cost of moving SSE register */
757 {4, 4, 6}, /* cost of loading SSE registers
758 in SImode, DImode and TImode */
759 {4, 4, 5}, /* cost of storing SSE registers
760 in SImode, DImode and TImode */
761 5, /* MMX or SSE register to integer */
762 64, /* size of l1 cache. */
763 256, /* size of l2 cache. */
764 64, /* size of prefetch block */
765 6, /* number of parallel prefetches */
767 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
768 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
769 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
770 COSTS_N_INSNS (2), /* cost of FABS instruction. */
771 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
772 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
775 1, /* scalar_stmt_cost. */
776 1, /* scalar load_cost. */
777 1, /* scalar_store_cost. */
778 1, /* vec_stmt_cost. */
779 1, /* vec_to_scalar_cost. */
780 1, /* scalar_to_vec_cost. */
781 1, /* vec_align_load_cost. */
782 2, /* vec_unalign_load_cost. */
783 1, /* vec_store_cost. */
784 3, /* cond_taken_branch_cost. */
785 1, /* cond_not_taken_branch_cost. */
788 /* K8 has optimized REP instruction for medium sized blocks, but for very
789 small blocks it is better to use loop. For large blocks, libcall can
790 do nontemporary accesses and beat inline considerably. */
791 static stringop_algs k8_memcpy[2] = {
792 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
793 {-1, rep_prefix_4_byte, false}}},
794 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
795 {-1, libcall, false}}}};
796 static stringop_algs k8_memset[2] = {
797 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
798 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
799 {libcall, {{48, unrolled_loop, false},
800 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
802 struct processor_costs k8_cost = {
803 COSTS_N_INSNS (1), /* cost of an add instruction */
804 COSTS_N_INSNS (2), /* cost of a lea instruction */
805 COSTS_N_INSNS (1), /* variable shift costs */
806 COSTS_N_INSNS (1), /* constant shift costs */
807 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
808 COSTS_N_INSNS (4), /* HI */
809 COSTS_N_INSNS (3), /* SI */
810 COSTS_N_INSNS (4), /* DI */
811 COSTS_N_INSNS (5)}, /* other */
812 0, /* cost of multiply per each bit set */
813 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
814 COSTS_N_INSNS (26), /* HI */
815 COSTS_N_INSNS (42), /* SI */
816 COSTS_N_INSNS (74), /* DI */
817 COSTS_N_INSNS (74)}, /* other */
818 COSTS_N_INSNS (1), /* cost of movsx */
819 COSTS_N_INSNS (1), /* cost of movzx */
820 8, /* "large" insn */
822 4, /* cost for loading QImode using movzbl */
823 {3, 4, 3}, /* cost of loading integer registers
824 in QImode, HImode and SImode.
825 Relative to reg-reg move (2). */
826 {3, 4, 3}, /* cost of storing integer registers */
827 4, /* cost of reg,reg fld/fst */
828 {4, 4, 12}, /* cost of loading fp registers
829 in SFmode, DFmode and XFmode */
830 {6, 6, 8}, /* cost of storing fp registers
831 in SFmode, DFmode and XFmode */
832 2, /* cost of moving MMX register */
833 {3, 3}, /* cost of loading MMX registers
834 in SImode and DImode */
835 {4, 4}, /* cost of storing MMX registers
836 in SImode and DImode */
837 2, /* cost of moving SSE register */
838 {4, 3, 6}, /* cost of loading SSE registers
839 in SImode, DImode and TImode */
840 {4, 4, 5}, /* cost of storing SSE registers
841 in SImode, DImode and TImode */
842 5, /* MMX or SSE register to integer */
843 64, /* size of l1 cache. */
844 512, /* size of l2 cache. */
845 64, /* size of prefetch block */
846 /* New AMD processors never drop prefetches; if they cannot be performed
847 immediately, they are queued. We set number of simultaneous prefetches
848 to a large constant to reflect this (it probably is not a good idea not
849 to limit number of prefetches at all, as their execution also takes some
851 100, /* number of parallel prefetches */
853 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
854 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
855 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
856 COSTS_N_INSNS (2), /* cost of FABS instruction. */
857 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
858 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
862 4, /* scalar_stmt_cost. */
863 2, /* scalar load_cost. */
864 2, /* scalar_store_cost. */
865 5, /* vec_stmt_cost. */
866 0, /* vec_to_scalar_cost. */
867 2, /* scalar_to_vec_cost. */
868 2, /* vec_align_load_cost. */
869 3, /* vec_unalign_load_cost. */
870 3, /* vec_store_cost. */
871 3, /* cond_taken_branch_cost. */
872 2, /* cond_not_taken_branch_cost. */
875 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
876 very small blocks it is better to use loop. For large blocks, libcall can
877 do nontemporary accesses and beat inline considerably. */
878 static stringop_algs amdfam10_memcpy[2] = {
879 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
880 {-1, rep_prefix_4_byte, false}}},
881 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
882 {-1, libcall, false}}}};
883 static stringop_algs amdfam10_memset[2] = {
884 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
885 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
886 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
887 {-1, libcall, false}}}};
888 struct processor_costs amdfam10_cost = {
889 COSTS_N_INSNS (1), /* cost of an add instruction */
890 COSTS_N_INSNS (2), /* cost of a lea instruction */
891 COSTS_N_INSNS (1), /* variable shift costs */
892 COSTS_N_INSNS (1), /* constant shift costs */
893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
894 COSTS_N_INSNS (4), /* HI */
895 COSTS_N_INSNS (3), /* SI */
896 COSTS_N_INSNS (4), /* DI */
897 COSTS_N_INSNS (5)}, /* other */
898 0, /* cost of multiply per each bit set */
899 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
900 COSTS_N_INSNS (35), /* HI */
901 COSTS_N_INSNS (51), /* SI */
902 COSTS_N_INSNS (83), /* DI */
903 COSTS_N_INSNS (83)}, /* other */
904 COSTS_N_INSNS (1), /* cost of movsx */
905 COSTS_N_INSNS (1), /* cost of movzx */
906 8, /* "large" insn */
908 4, /* cost for loading QImode using movzbl */
909 {3, 4, 3}, /* cost of loading integer registers
910 in QImode, HImode and SImode.
911 Relative to reg-reg move (2). */
912 {3, 4, 3}, /* cost of storing integer registers */
913 4, /* cost of reg,reg fld/fst */
914 {4, 4, 12}, /* cost of loading fp registers
915 in SFmode, DFmode and XFmode */
916 {6, 6, 8}, /* cost of storing fp registers
917 in SFmode, DFmode and XFmode */
918 2, /* cost of moving MMX register */
919 {3, 3}, /* cost of loading MMX registers
920 in SImode and DImode */
921 {4, 4}, /* cost of storing MMX registers
922 in SImode and DImode */
923 2, /* cost of moving SSE register */
924 {4, 4, 3}, /* cost of loading SSE registers
925 in SImode, DImode and TImode */
926 {4, 4, 5}, /* cost of storing SSE registers
927 in SImode, DImode and TImode */
928 3, /* MMX or SSE register to integer */
930 MOVD reg64, xmmreg Double FSTORE 4
931 MOVD reg32, xmmreg Double FSTORE 4
933 MOVD reg64, xmmreg Double FADD 3
935 MOVD reg32, xmmreg Double FADD 3
937 64, /* size of l1 cache. */
938 512, /* size of l2 cache. */
939 64, /* size of prefetch block */
940 /* New AMD processors never drop prefetches; if they cannot be performed
941 immediately, they are queued. We set number of simultaneous prefetches
942 to a large constant to reflect this (it probably is not a good idea not
943 to limit number of prefetches at all, as their execution also takes some
945 100, /* number of parallel prefetches */
947 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
948 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
949 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
950 COSTS_N_INSNS (2), /* cost of FABS instruction. */
951 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
952 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
956 4, /* scalar_stmt_cost. */
957 2, /* scalar load_cost. */
958 2, /* scalar_store_cost. */
959 6, /* vec_stmt_cost. */
960 0, /* vec_to_scalar_cost. */
961 2, /* scalar_to_vec_cost. */
962 2, /* vec_align_load_cost. */
963 2, /* vec_unalign_load_cost. */
964 2, /* vec_store_cost. */
965 2, /* cond_taken_branch_cost. */
966 1, /* cond_not_taken_branch_cost. */
969 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
970 very small blocks it is better to use loop. For large blocks, libcall
971 can do nontemporary accesses and beat inline considerably. */
972 static stringop_algs bdver1_memcpy[2] = {
973 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
974 {-1, rep_prefix_4_byte, false}}},
975 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
976 {-1, libcall, false}}}};
977 static stringop_algs bdver1_memset[2] = {
978 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
979 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
980 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
981 {-1, libcall, false}}}};
983 const struct processor_costs bdver1_cost = {
984 COSTS_N_INSNS (1), /* cost of an add instruction */
985 COSTS_N_INSNS (1), /* cost of a lea instruction */
986 COSTS_N_INSNS (1), /* variable shift costs */
987 COSTS_N_INSNS (1), /* constant shift costs */
988 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
989 COSTS_N_INSNS (4), /* HI */
990 COSTS_N_INSNS (4), /* SI */
991 COSTS_N_INSNS (6), /* DI */
992 COSTS_N_INSNS (6)}, /* other */
993 0, /* cost of multiply per each bit set */
994 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
995 COSTS_N_INSNS (35), /* HI */
996 COSTS_N_INSNS (51), /* SI */
997 COSTS_N_INSNS (83), /* DI */
998 COSTS_N_INSNS (83)}, /* other */
999 COSTS_N_INSNS (1), /* cost of movsx */
1000 COSTS_N_INSNS (1), /* cost of movzx */
1001 8, /* "large" insn */
1003 4, /* cost for loading QImode using movzbl */
1004 {5, 5, 4}, /* cost of loading integer registers
1005 in QImode, HImode and SImode.
1006 Relative to reg-reg move (2). */
1007 {4, 4, 4}, /* cost of storing integer registers */
1008 2, /* cost of reg,reg fld/fst */
1009 {5, 5, 12}, /* cost of loading fp registers
1010 in SFmode, DFmode and XFmode */
1011 {4, 4, 8}, /* cost of storing fp registers
1012 in SFmode, DFmode and XFmode */
1013 2, /* cost of moving MMX register */
1014 {4, 4}, /* cost of loading MMX registers
1015 in SImode and DImode */
1016 {4, 4}, /* cost of storing MMX registers
1017 in SImode and DImode */
1018 2, /* cost of moving SSE register */
1019 {4, 4, 4}, /* cost of loading SSE registers
1020 in SImode, DImode and TImode */
1021 {4, 4, 4}, /* cost of storing SSE registers
1022 in SImode, DImode and TImode */
1023 2, /* MMX or SSE register to integer */
1025 MOVD reg64, xmmreg Double FSTORE 4
1026 MOVD reg32, xmmreg Double FSTORE 4
1028 MOVD reg64, xmmreg Double FADD 3
1030 MOVD reg32, xmmreg Double FADD 3
1032 16, /* size of l1 cache. */
1033 2048, /* size of l2 cache. */
1034 64, /* size of prefetch block */
1035 /* New AMD processors never drop prefetches; if they cannot be performed
1036 immediately, they are queued. We set number of simultaneous prefetches
1037 to a large constant to reflect this (it probably is not a good idea not
1038 to limit number of prefetches at all, as their execution also takes some
1040 100, /* number of parallel prefetches */
1041 2, /* Branch cost */
1042 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1043 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1044 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1045 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1046 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1047 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1051 6, /* scalar_stmt_cost. */
1052 4, /* scalar load_cost. */
1053 4, /* scalar_store_cost. */
1054 6, /* vec_stmt_cost. */
1055 0, /* vec_to_scalar_cost. */
1056 2, /* scalar_to_vec_cost. */
1057 4, /* vec_align_load_cost. */
1058 4, /* vec_unalign_load_cost. */
1059 4, /* vec_store_cost. */
1060 4, /* cond_taken_branch_cost. */
1061 2, /* cond_not_taken_branch_cost. */
1064 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1065 very small blocks it is better to use loop. For large blocks, libcall
1066 can do nontemporary accesses and beat inline considerably. */
1068 static stringop_algs bdver2_memcpy[2] = {
1069 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1070 {-1, rep_prefix_4_byte, false}}},
1071 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1072 {-1, libcall, false}}}};
1073 static stringop_algs bdver2_memset[2] = {
1074 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1075 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1076 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1077 {-1, libcall, false}}}};
1079 const struct processor_costs bdver2_cost = {
1080 COSTS_N_INSNS (1), /* cost of an add instruction */
1081 COSTS_N_INSNS (1), /* cost of a lea instruction */
1082 COSTS_N_INSNS (1), /* variable shift costs */
1083 COSTS_N_INSNS (1), /* constant shift costs */
1084 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1085 COSTS_N_INSNS (4), /* HI */
1086 COSTS_N_INSNS (4), /* SI */
1087 COSTS_N_INSNS (6), /* DI */
1088 COSTS_N_INSNS (6)}, /* other */
1089 0, /* cost of multiply per each bit set */
1090 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1091 COSTS_N_INSNS (35), /* HI */
1092 COSTS_N_INSNS (51), /* SI */
1093 COSTS_N_INSNS (83), /* DI */
1094 COSTS_N_INSNS (83)}, /* other */
1095 COSTS_N_INSNS (1), /* cost of movsx */
1096 COSTS_N_INSNS (1), /* cost of movzx */
1097 8, /* "large" insn */
1099 4, /* cost for loading QImode using movzbl */
1100 {5, 5, 4}, /* cost of loading integer registers
1101 in QImode, HImode and SImode.
1102 Relative to reg-reg move (2). */
1103 {4, 4, 4}, /* cost of storing integer registers */
1104 2, /* cost of reg,reg fld/fst */
1105 {5, 5, 12}, /* cost of loading fp registers
1106 in SFmode, DFmode and XFmode */
1107 {4, 4, 8}, /* cost of storing fp registers
1108 in SFmode, DFmode and XFmode */
1109 2, /* cost of moving MMX register */
1110 {4, 4}, /* cost of loading MMX registers
1111 in SImode and DImode */
1112 {4, 4}, /* cost of storing MMX registers
1113 in SImode and DImode */
1114 2, /* cost of moving SSE register */
1115 {4, 4, 4}, /* cost of loading SSE registers
1116 in SImode, DImode and TImode */
1117 {4, 4, 4}, /* cost of storing SSE registers
1118 in SImode, DImode and TImode */
1119 2, /* MMX or SSE register to integer */
1121 MOVD reg64, xmmreg Double FSTORE 4
1122 MOVD reg32, xmmreg Double FSTORE 4
1124 MOVD reg64, xmmreg Double FADD 3
1126 MOVD reg32, xmmreg Double FADD 3
1128 16, /* size of l1 cache. */
1129 2048, /* size of l2 cache. */
1130 64, /* size of prefetch block */
1131 /* New AMD processors never drop prefetches; if they cannot be performed
1132 immediately, they are queued. We set number of simultaneous prefetches
1133 to a large constant to reflect this (it probably is not a good idea not
1134 to limit number of prefetches at all, as their execution also takes some
1136 100, /* number of parallel prefetches */
1137 2, /* Branch cost */
1138 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1139 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1140 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1141 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1142 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1143 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1147 6, /* scalar_stmt_cost. */
1148 4, /* scalar load_cost. */
1149 4, /* scalar_store_cost. */
1150 6, /* vec_stmt_cost. */
1151 0, /* vec_to_scalar_cost. */
1152 2, /* scalar_to_vec_cost. */
1153 4, /* vec_align_load_cost. */
1154 4, /* vec_unalign_load_cost. */
1155 4, /* vec_store_cost. */
1156 4, /* cond_taken_branch_cost. */
1157 2, /* cond_not_taken_branch_cost. */
1161 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1162 very small blocks it is better to use loop. For large blocks, libcall
1163 can do nontemporary accesses and beat inline considerably. */
1164 static stringop_algs bdver3_memcpy[2] = {
1165 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1166 {-1, rep_prefix_4_byte, false}}},
1167 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1168 {-1, libcall, false}}}};
1169 static stringop_algs bdver3_memset[2] = {
1170 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1171 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1172 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1173 {-1, libcall, false}}}};
1174 struct processor_costs bdver3_cost = {
1175 COSTS_N_INSNS (1), /* cost of an add instruction */
1176 COSTS_N_INSNS (1), /* cost of a lea instruction */
1177 COSTS_N_INSNS (1), /* variable shift costs */
1178 COSTS_N_INSNS (1), /* constant shift costs */
1179 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1180 COSTS_N_INSNS (4), /* HI */
1181 COSTS_N_INSNS (4), /* SI */
1182 COSTS_N_INSNS (6), /* DI */
1183 COSTS_N_INSNS (6)}, /* other */
1184 0, /* cost of multiply per each bit set */
1185 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1186 COSTS_N_INSNS (35), /* HI */
1187 COSTS_N_INSNS (51), /* SI */
1188 COSTS_N_INSNS (83), /* DI */
1189 COSTS_N_INSNS (83)}, /* other */
1190 COSTS_N_INSNS (1), /* cost of movsx */
1191 COSTS_N_INSNS (1), /* cost of movzx */
1192 8, /* "large" insn */
1194 4, /* cost for loading QImode using movzbl */
1195 {5, 5, 4}, /* cost of loading integer registers
1196 in QImode, HImode and SImode.
1197 Relative to reg-reg move (2). */
1198 {4, 4, 4}, /* cost of storing integer registers */
1199 2, /* cost of reg,reg fld/fst */
1200 {5, 5, 12}, /* cost of loading fp registers
1201 in SFmode, DFmode and XFmode */
1202 {4, 4, 8}, /* cost of storing fp registers
1203 in SFmode, DFmode and XFmode */
1204 2, /* cost of moving MMX register */
1205 {4, 4}, /* cost of loading MMX registers
1206 in SImode and DImode */
1207 {4, 4}, /* cost of storing MMX registers
1208 in SImode and DImode */
1209 2, /* cost of moving SSE register */
1210 {4, 4, 4}, /* cost of loading SSE registers
1211 in SImode, DImode and TImode */
1212 {4, 4, 4}, /* cost of storing SSE registers
1213 in SImode, DImode and TImode */
1214 2, /* MMX or SSE register to integer */
1215 16, /* size of l1 cache. */
1216 2048, /* size of l2 cache. */
1217 64, /* size of prefetch block */
1218 /* New AMD processors never drop prefetches; if they cannot be performed
1219 immediately, they are queued. We set number of simultaneous prefetches
1220 to a large constant to reflect this (it probably is not a good idea not
1221 to limit number of prefetches at all, as their execution also takes some
1223 100, /* number of parallel prefetches */
1224 2, /* Branch cost */
1225 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1226 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1227 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1228 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1229 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1230 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1234 6, /* scalar_stmt_cost. */
1235 4, /* scalar load_cost. */
1236 4, /* scalar_store_cost. */
1237 6, /* vec_stmt_cost. */
1238 0, /* vec_to_scalar_cost. */
1239 2, /* scalar_to_vec_cost. */
1240 4, /* vec_align_load_cost. */
1241 4, /* vec_unalign_load_cost. */
1242 4, /* vec_store_cost. */
1243 4, /* cond_taken_branch_cost. */
1244 2, /* cond_not_taken_branch_cost. */
1247 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1248 very small blocks it is better to use loop. For large blocks, libcall
1249 can do nontemporary accesses and beat inline considerably. */
1250 static stringop_algs bdver4_memcpy[2] = {
1251 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1252 {-1, rep_prefix_4_byte, false}}},
1253 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1254 {-1, libcall, false}}}};
1255 static stringop_algs bdver4_memset[2] = {
1256 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1257 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1258 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1259 {-1, libcall, false}}}};
1260 struct processor_costs bdver4_cost = {
1261 COSTS_N_INSNS (1), /* cost of an add instruction */
1262 COSTS_N_INSNS (1), /* cost of a lea instruction */
1263 COSTS_N_INSNS (1), /* variable shift costs */
1264 COSTS_N_INSNS (1), /* constant shift costs */
1265 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1266 COSTS_N_INSNS (4), /* HI */
1267 COSTS_N_INSNS (4), /* SI */
1268 COSTS_N_INSNS (6), /* DI */
1269 COSTS_N_INSNS (6)}, /* other */
1270 0, /* cost of multiply per each bit set */
1271 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1272 COSTS_N_INSNS (35), /* HI */
1273 COSTS_N_INSNS (51), /* SI */
1274 COSTS_N_INSNS (83), /* DI */
1275 COSTS_N_INSNS (83)}, /* other */
1276 COSTS_N_INSNS (1), /* cost of movsx */
1277 COSTS_N_INSNS (1), /* cost of movzx */
1278 8, /* "large" insn */
1280 4, /* cost for loading QImode using movzbl */
1281 {5, 5, 4}, /* cost of loading integer registers
1282 in QImode, HImode and SImode.
1283 Relative to reg-reg move (2). */
1284 {4, 4, 4}, /* cost of storing integer registers */
1285 2, /* cost of reg,reg fld/fst */
1286 {5, 5, 12}, /* cost of loading fp registers
1287 in SFmode, DFmode and XFmode */
1288 {4, 4, 8}, /* cost of storing fp registers
1289 in SFmode, DFmode and XFmode */
1290 2, /* cost of moving MMX register */
1291 {4, 4}, /* cost of loading MMX registers
1292 in SImode and DImode */
1293 {4, 4}, /* cost of storing MMX registers
1294 in SImode and DImode */
1295 2, /* cost of moving SSE register */
1296 {4, 4, 4}, /* cost of loading SSE registers
1297 in SImode, DImode and TImode */
1298 {4, 4, 4}, /* cost of storing SSE registers
1299 in SImode, DImode and TImode */
1300 2, /* MMX or SSE register to integer */
1301 16, /* size of l1 cache. */
1302 2048, /* size of l2 cache. */
1303 64, /* size of prefetch block */
1304 /* New AMD processors never drop prefetches; if they cannot be performed
1305 immediately, they are queued. We set number of simultaneous prefetches
1306 to a large constant to reflect this (it probably is not a good idea not
1307 to limit number of prefetches at all, as their execution also takes some
1309 100, /* number of parallel prefetches */
1310 2, /* Branch cost */
1311 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1312 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1313 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1314 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1315 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1316 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 6, /* scalar_stmt_cost. */
1321 4, /* scalar load_cost. */
1322 4, /* scalar_store_cost. */
1323 6, /* vec_stmt_cost. */
1324 0, /* vec_to_scalar_cost. */
1325 2, /* scalar_to_vec_cost. */
1326 4, /* vec_align_load_cost. */
1327 4, /* vec_unalign_load_cost. */
1328 4, /* vec_store_cost. */
1329 4, /* cond_taken_branch_cost. */
1330 2, /* cond_not_taken_branch_cost. */
1334 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1335 very small blocks it is better to use loop. For large blocks, libcall
1336 can do nontemporary accesses and beat inline considerably. */
1337 static stringop_algs znver1_memcpy[2] = {
1338 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1339 {-1, rep_prefix_4_byte, false}}},
1340 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1341 {-1, libcall, false}}}};
1342 static stringop_algs znver1_memset[2] = {
1343 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1344 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1345 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1346 {-1, libcall, false}}}};
1347 struct processor_costs znver1_cost = {
1348 COSTS_N_INSNS (1), /* cost of an add instruction. */
1349 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1350 COSTS_N_INSNS (1), /* variable shift costs. */
1351 COSTS_N_INSNS (1), /* constant shift costs. */
1352 {COSTS_N_INSNS (4), /* cost of starting multiply for QI. */
1353 COSTS_N_INSNS (4), /* HI. */
1354 COSTS_N_INSNS (4), /* SI. */
1355 COSTS_N_INSNS (6), /* DI. */
1356 COSTS_N_INSNS (6)}, /* other. */
1357 0, /* cost of multiply per each bit
1359 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1360 COSTS_N_INSNS (35), /* HI. */
1361 COSTS_N_INSNS (51), /* SI. */
1362 COSTS_N_INSNS (83), /* DI. */
1363 COSTS_N_INSNS (83)}, /* other. */
1364 COSTS_N_INSNS (1), /* cost of movsx. */
1365 COSTS_N_INSNS (1), /* cost of movzx. */
1366 8, /* "large" insn. */
1367 9, /* MOVE_RATIO. */
1368 4, /* cost for loading QImode using
1370 {5, 5, 4}, /* cost of loading integer registers
1371 in QImode, HImode and SImode.
1372 Relative to reg-reg move (2). */
1373 {4, 4, 4}, /* cost of storing integer
1375 2, /* cost of reg,reg fld/fst. */
1376 {5, 5, 12}, /* cost of loading fp registers
1377 in SFmode, DFmode and XFmode. */
1378 {4, 4, 8}, /* cost of storing fp registers
1379 in SFmode, DFmode and XFmode. */
1380 2, /* cost of moving MMX register. */
1381 {4, 4}, /* cost of loading MMX registers
1382 in SImode and DImode. */
1383 {4, 4}, /* cost of storing MMX registers
1384 in SImode and DImode. */
1385 2, /* cost of moving SSE register. */
1386 {4, 4, 4}, /* cost of loading SSE registers
1387 in SImode, DImode and TImode. */
1388 {4, 4, 4}, /* cost of storing SSE registers
1389 in SImode, DImode and TImode. */
1390 2, /* MMX or SSE register to integer. */
1391 32, /* size of l1 cache. */
1392 512, /* size of l2 cache. */
1393 64, /* size of prefetch block. */
1394 /* New AMD processors never drop prefetches; if they cannot be performed
1395 immediately, they are queued. We set number of simultaneous prefetches
1396 to a large constant to reflect this (it probably is not a good idea not
1397 to limit number of prefetches at all, as their execution also takes some
1399 100, /* number of parallel prefetches. */
1400 2, /* Branch cost. */
1401 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1402 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1403 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1404 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1405 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1406 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1410 6, /* scalar_stmt_cost. */
1411 4, /* scalar load_cost. */
1412 4, /* scalar_store_cost. */
1413 6, /* vec_stmt_cost. */
1414 0, /* vec_to_scalar_cost. */
1415 2, /* scalar_to_vec_cost. */
1416 4, /* vec_align_load_cost. */
1417 4, /* vec_unalign_load_cost. */
1418 4, /* vec_store_cost. */
1419 4, /* cond_taken_branch_cost. */
1420 2, /* cond_not_taken_branch_cost. */
1423 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1424 very small blocks it is better to use loop. For large blocks, libcall can
1425 do nontemporary accesses and beat inline considerably. */
1426 static stringop_algs btver1_memcpy[2] = {
1427 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1428 {-1, rep_prefix_4_byte, false}}},
1429 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1430 {-1, libcall, false}}}};
1431 static stringop_algs btver1_memset[2] = {
1432 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1433 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1434 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1435 {-1, libcall, false}}}};
1436 const struct processor_costs btver1_cost = {
1437 COSTS_N_INSNS (1), /* cost of an add instruction */
1438 COSTS_N_INSNS (2), /* cost of a lea instruction */
1439 COSTS_N_INSNS (1), /* variable shift costs */
1440 COSTS_N_INSNS (1), /* constant shift costs */
1441 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1442 COSTS_N_INSNS (4), /* HI */
1443 COSTS_N_INSNS (3), /* SI */
1444 COSTS_N_INSNS (4), /* DI */
1445 COSTS_N_INSNS (5)}, /* other */
1446 0, /* cost of multiply per each bit set */
1447 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1448 COSTS_N_INSNS (35), /* HI */
1449 COSTS_N_INSNS (51), /* SI */
1450 COSTS_N_INSNS (83), /* DI */
1451 COSTS_N_INSNS (83)}, /* other */
1452 COSTS_N_INSNS (1), /* cost of movsx */
1453 COSTS_N_INSNS (1), /* cost of movzx */
1454 8, /* "large" insn */
1456 4, /* cost for loading QImode using movzbl */
1457 {3, 4, 3}, /* cost of loading integer registers
1458 in QImode, HImode and SImode.
1459 Relative to reg-reg move (2). */
1460 {3, 4, 3}, /* cost of storing integer registers */
1461 4, /* cost of reg,reg fld/fst */
1462 {4, 4, 12}, /* cost of loading fp registers
1463 in SFmode, DFmode and XFmode */
1464 {6, 6, 8}, /* cost of storing fp registers
1465 in SFmode, DFmode and XFmode */
1466 2, /* cost of moving MMX register */
1467 {3, 3}, /* cost of loading MMX registers
1468 in SImode and DImode */
1469 {4, 4}, /* cost of storing MMX registers
1470 in SImode and DImode */
1471 2, /* cost of moving SSE register */
1472 {4, 4, 3}, /* cost of loading SSE registers
1473 in SImode, DImode and TImode */
1474 {4, 4, 5}, /* cost of storing SSE registers
1475 in SImode, DImode and TImode */
1476 3, /* MMX or SSE register to integer */
1478 MOVD reg64, xmmreg Double FSTORE 4
1479 MOVD reg32, xmmreg Double FSTORE 4
1481 MOVD reg64, xmmreg Double FADD 3
1483 MOVD reg32, xmmreg Double FADD 3
1485 32, /* size of l1 cache. */
1486 512, /* size of l2 cache. */
1487 64, /* size of prefetch block */
1488 100, /* number of parallel prefetches */
1489 2, /* Branch cost */
1490 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1491 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1492 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1493 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1494 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1495 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 static stringop_algs btver2_memcpy[2] = {
1513 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1514 {-1, rep_prefix_4_byte, false}}},
1515 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1516 {-1, libcall, false}}}};
1517 static stringop_algs btver2_memset[2] = {
1518 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1519 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1520 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1521 {-1, libcall, false}}}};
1522 const struct processor_costs btver2_cost = {
1523 COSTS_N_INSNS (1), /* cost of an add instruction */
1524 COSTS_N_INSNS (2), /* cost of a lea instruction */
1525 COSTS_N_INSNS (1), /* variable shift costs */
1526 COSTS_N_INSNS (1), /* constant shift costs */
1527 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1528 COSTS_N_INSNS (4), /* HI */
1529 COSTS_N_INSNS (3), /* SI */
1530 COSTS_N_INSNS (4), /* DI */
1531 COSTS_N_INSNS (5)}, /* other */
1532 0, /* cost of multiply per each bit set */
1533 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1534 COSTS_N_INSNS (35), /* HI */
1535 COSTS_N_INSNS (51), /* SI */
1536 COSTS_N_INSNS (83), /* DI */
1537 COSTS_N_INSNS (83)}, /* other */
1538 COSTS_N_INSNS (1), /* cost of movsx */
1539 COSTS_N_INSNS (1), /* cost of movzx */
1540 8, /* "large" insn */
1542 4, /* cost for loading QImode using movzbl */
1543 {3, 4, 3}, /* cost of loading integer registers
1544 in QImode, HImode and SImode.
1545 Relative to reg-reg move (2). */
1546 {3, 4, 3}, /* cost of storing integer registers */
1547 4, /* cost of reg,reg fld/fst */
1548 {4, 4, 12}, /* cost of loading fp registers
1549 in SFmode, DFmode and XFmode */
1550 {6, 6, 8}, /* cost of storing fp registers
1551 in SFmode, DFmode and XFmode */
1552 2, /* cost of moving MMX register */
1553 {3, 3}, /* cost of loading MMX registers
1554 in SImode and DImode */
1555 {4, 4}, /* cost of storing MMX registers
1556 in SImode and DImode */
1557 2, /* cost of moving SSE register */
1558 {4, 4, 3}, /* cost of loading SSE registers
1559 in SImode, DImode and TImode */
1560 {4, 4, 5}, /* cost of storing SSE registers
1561 in SImode, DImode and TImode */
1562 3, /* MMX or SSE register to integer */
1564 MOVD reg64, xmmreg Double FSTORE 4
1565 MOVD reg32, xmmreg Double FSTORE 4
1567 MOVD reg64, xmmreg Double FADD 3
1569 MOVD reg32, xmmreg Double FADD 3
1571 32, /* size of l1 cache. */
1572 2048, /* size of l2 cache. */
1573 64, /* size of prefetch block */
1574 100, /* number of parallel prefetches */
1575 2, /* Branch cost */
1576 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1577 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1578 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1579 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1580 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1581 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1584 4, /* scalar_stmt_cost. */
1585 2, /* scalar load_cost. */
1586 2, /* scalar_store_cost. */
1587 6, /* vec_stmt_cost. */
1588 0, /* vec_to_scalar_cost. */
1589 2, /* scalar_to_vec_cost. */
1590 2, /* vec_align_load_cost. */
1591 2, /* vec_unalign_load_cost. */
1592 2, /* vec_store_cost. */
1593 2, /* cond_taken_branch_cost. */
1594 1, /* cond_not_taken_branch_cost. */
1597 static stringop_algs pentium4_memcpy[2] = {
1598 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1599 DUMMY_STRINGOP_ALGS};
1600 static stringop_algs pentium4_memset[2] = {
1601 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1602 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 DUMMY_STRINGOP_ALGS};
1606 struct processor_costs pentium4_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (3), /* cost of a lea instruction */
1609 COSTS_N_INSNS (4), /* variable shift costs */
1610 COSTS_N_INSNS (4), /* constant shift costs */
1611 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (15), /* HI */
1613 COSTS_N_INSNS (15), /* SI */
1614 COSTS_N_INSNS (15), /* DI */
1615 COSTS_N_INSNS (15)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (56), /* HI */
1619 COSTS_N_INSNS (56), /* SI */
1620 COSTS_N_INSNS (56), /* DI */
1621 COSTS_N_INSNS (56)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 16, /* "large" insn */
1626 2, /* cost for loading QImode using movzbl */
1627 {4, 5, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {2, 3, 2}, /* cost of storing integer registers */
1631 2, /* cost of reg,reg fld/fst */
1632 {2, 2, 6}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {4, 4, 6}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 2, /* cost of moving MMX register */
1637 {2, 2}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {2, 2}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 12, /* cost of moving SSE register */
1642 {12, 12, 12}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {2, 2, 8}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 10, /* MMX or SSE register to integer */
1647 8, /* size of l1 cache. */
1648 256, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 6, /* number of parallel prefetches */
1651 2, /* Branch cost */
1652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs nocona_memcpy[2] = {
1674 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1676 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1678 static stringop_algs nocona_memset[2] = {
1679 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1680 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1681 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1682 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 struct processor_costs nocona_cost = {
1686 COSTS_N_INSNS (1), /* cost of an add instruction */
1687 COSTS_N_INSNS (1), /* cost of a lea instruction */
1688 COSTS_N_INSNS (1), /* variable shift costs */
1689 COSTS_N_INSNS (1), /* constant shift costs */
1690 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1691 COSTS_N_INSNS (10), /* HI */
1692 COSTS_N_INSNS (10), /* SI */
1693 COSTS_N_INSNS (10), /* DI */
1694 COSTS_N_INSNS (10)}, /* other */
1695 0, /* cost of multiply per each bit set */
1696 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1697 COSTS_N_INSNS (66), /* HI */
1698 COSTS_N_INSNS (66), /* SI */
1699 COSTS_N_INSNS (66), /* DI */
1700 COSTS_N_INSNS (66)}, /* other */
1701 COSTS_N_INSNS (1), /* cost of movsx */
1702 COSTS_N_INSNS (1), /* cost of movzx */
1703 16, /* "large" insn */
1704 17, /* MOVE_RATIO */
1705 4, /* cost for loading QImode using movzbl */
1706 {4, 4, 4}, /* cost of loading integer registers
1707 in QImode, HImode and SImode.
1708 Relative to reg-reg move (2). */
1709 {4, 4, 4}, /* cost of storing integer registers */
1710 3, /* cost of reg,reg fld/fst */
1711 {12, 12, 12}, /* cost of loading fp registers
1712 in SFmode, DFmode and XFmode */
1713 {4, 4, 4}, /* cost of storing fp registers
1714 in SFmode, DFmode and XFmode */
1715 6, /* cost of moving MMX register */
1716 {12, 12}, /* cost of loading MMX registers
1717 in SImode and DImode */
1718 {12, 12}, /* cost of storing MMX registers
1719 in SImode and DImode */
1720 6, /* cost of moving SSE register */
1721 {12, 12, 12}, /* cost of loading SSE registers
1722 in SImode, DImode and TImode */
1723 {12, 12, 12}, /* cost of storing SSE registers
1724 in SImode, DImode and TImode */
1725 8, /* MMX or SSE register to integer */
1726 8, /* size of l1 cache. */
1727 1024, /* size of l2 cache. */
1728 64, /* size of prefetch block */
1729 8, /* number of parallel prefetches */
1730 1, /* Branch cost */
1731 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1732 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1733 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1734 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1735 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1736 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1739 1, /* scalar_stmt_cost. */
1740 1, /* scalar load_cost. */
1741 1, /* scalar_store_cost. */
1742 1, /* vec_stmt_cost. */
1743 1, /* vec_to_scalar_cost. */
1744 1, /* scalar_to_vec_cost. */
1745 1, /* vec_align_load_cost. */
1746 2, /* vec_unalign_load_cost. */
1747 1, /* vec_store_cost. */
1748 3, /* cond_taken_branch_cost. */
1749 1, /* cond_not_taken_branch_cost. */
1752 static stringop_algs atom_memcpy[2] = {
1753 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1754 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1756 static stringop_algs atom_memset[2] = {
1757 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1758 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1759 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1760 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 struct processor_costs atom_cost = {
1763 COSTS_N_INSNS (1), /* cost of an add instruction */
1764 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1765 COSTS_N_INSNS (1), /* variable shift costs */
1766 COSTS_N_INSNS (1), /* constant shift costs */
1767 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1768 COSTS_N_INSNS (4), /* HI */
1769 COSTS_N_INSNS (3), /* SI */
1770 COSTS_N_INSNS (4), /* DI */
1771 COSTS_N_INSNS (2)}, /* other */
1772 0, /* cost of multiply per each bit set */
1773 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1774 COSTS_N_INSNS (26), /* HI */
1775 COSTS_N_INSNS (42), /* SI */
1776 COSTS_N_INSNS (74), /* DI */
1777 COSTS_N_INSNS (74)}, /* other */
1778 COSTS_N_INSNS (1), /* cost of movsx */
1779 COSTS_N_INSNS (1), /* cost of movzx */
1780 8, /* "large" insn */
1781 17, /* MOVE_RATIO */
1782 4, /* cost for loading QImode using movzbl */
1783 {4, 4, 4}, /* cost of loading integer registers
1784 in QImode, HImode and SImode.
1785 Relative to reg-reg move (2). */
1786 {4, 4, 4}, /* cost of storing integer registers */
1787 4, /* cost of reg,reg fld/fst */
1788 {12, 12, 12}, /* cost of loading fp registers
1789 in SFmode, DFmode and XFmode */
1790 {6, 6, 8}, /* cost of storing fp registers
1791 in SFmode, DFmode and XFmode */
1792 2, /* cost of moving MMX register */
1793 {8, 8}, /* cost of loading MMX registers
1794 in SImode and DImode */
1795 {8, 8}, /* cost of storing MMX registers
1796 in SImode and DImode */
1797 2, /* cost of moving SSE register */
1798 {8, 8, 8}, /* cost of loading SSE registers
1799 in SImode, DImode and TImode */
1800 {8, 8, 8}, /* cost of storing SSE registers
1801 in SImode, DImode and TImode */
1802 5, /* MMX or SSE register to integer */
1803 32, /* size of l1 cache. */
1804 256, /* size of l2 cache. */
1805 64, /* size of prefetch block */
1806 6, /* number of parallel prefetches */
1807 3, /* Branch cost */
1808 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1809 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1810 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1811 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1812 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1813 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 1, /* scalar_stmt_cost. */
1817 1, /* scalar load_cost. */
1818 1, /* scalar_store_cost. */
1819 1, /* vec_stmt_cost. */
1820 1, /* vec_to_scalar_cost. */
1821 1, /* scalar_to_vec_cost. */
1822 1, /* vec_align_load_cost. */
1823 2, /* vec_unalign_load_cost. */
1824 1, /* vec_store_cost. */
1825 3, /* cond_taken_branch_cost. */
1826 1, /* cond_not_taken_branch_cost. */
1829 static stringop_algs slm_memcpy[2] = {
1830 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1831 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1832 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1833 static stringop_algs slm_memset[2] = {
1834 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1835 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1836 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1837 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1839 struct processor_costs slm_cost = {
1840 COSTS_N_INSNS (1), /* cost of an add instruction */
1841 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1842 COSTS_N_INSNS (1), /* variable shift costs */
1843 COSTS_N_INSNS (1), /* constant shift costs */
1844 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1845 COSTS_N_INSNS (3), /* HI */
1846 COSTS_N_INSNS (3), /* SI */
1847 COSTS_N_INSNS (4), /* DI */
1848 COSTS_N_INSNS (2)}, /* other */
1849 0, /* cost of multiply per each bit set */
1850 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1851 COSTS_N_INSNS (26), /* HI */
1852 COSTS_N_INSNS (42), /* SI */
1853 COSTS_N_INSNS (74), /* DI */
1854 COSTS_N_INSNS (74)}, /* other */
1855 COSTS_N_INSNS (1), /* cost of movsx */
1856 COSTS_N_INSNS (1), /* cost of movzx */
1857 8, /* "large" insn */
1858 17, /* MOVE_RATIO */
1859 4, /* cost for loading QImode using movzbl */
1860 {4, 4, 4}, /* cost of loading integer registers
1861 in QImode, HImode and SImode.
1862 Relative to reg-reg move (2). */
1863 {4, 4, 4}, /* cost of storing integer registers */
1864 4, /* cost of reg,reg fld/fst */
1865 {12, 12, 12}, /* cost of loading fp registers
1866 in SFmode, DFmode and XFmode */
1867 {6, 6, 8}, /* cost of storing fp registers
1868 in SFmode, DFmode and XFmode */
1869 2, /* cost of moving MMX register */
1870 {8, 8}, /* cost of loading MMX registers
1871 in SImode and DImode */
1872 {8, 8}, /* cost of storing MMX registers
1873 in SImode and DImode */
1874 2, /* cost of moving SSE register */
1875 {8, 8, 8}, /* cost of loading SSE registers
1876 in SImode, DImode and TImode */
1877 {8, 8, 8}, /* cost of storing SSE registers
1878 in SImode, DImode and TImode */
1879 5, /* MMX or SSE register to integer */
1880 32, /* size of l1 cache. */
1881 256, /* size of l2 cache. */
1882 64, /* size of prefetch block */
1883 6, /* number of parallel prefetches */
1884 3, /* Branch cost */
1885 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1886 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1887 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1888 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1889 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1890 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1893 1, /* scalar_stmt_cost. */
1894 1, /* scalar load_cost. */
1895 1, /* scalar_store_cost. */
1896 1, /* vec_stmt_cost. */
1897 4, /* vec_to_scalar_cost. */
1898 1, /* scalar_to_vec_cost. */
1899 1, /* vec_align_load_cost. */
1900 2, /* vec_unalign_load_cost. */
1901 1, /* vec_store_cost. */
1902 3, /* cond_taken_branch_cost. */
1903 1, /* cond_not_taken_branch_cost. */
1906 static stringop_algs intel_memcpy[2] = {
1907 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1908 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1909 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1910 static stringop_algs intel_memset[2] = {
1911 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1912 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1913 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1914 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1916 struct processor_costs intel_cost = {
1917 COSTS_N_INSNS (1), /* cost of an add instruction */
1918 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1919 COSTS_N_INSNS (1), /* variable shift costs */
1920 COSTS_N_INSNS (1), /* constant shift costs */
1921 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1922 COSTS_N_INSNS (3), /* HI */
1923 COSTS_N_INSNS (3), /* SI */
1924 COSTS_N_INSNS (4), /* DI */
1925 COSTS_N_INSNS (2)}, /* other */
1926 0, /* cost of multiply per each bit set */
1927 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1928 COSTS_N_INSNS (26), /* HI */
1929 COSTS_N_INSNS (42), /* SI */
1930 COSTS_N_INSNS (74), /* DI */
1931 COSTS_N_INSNS (74)}, /* other */
1932 COSTS_N_INSNS (1), /* cost of movsx */
1933 COSTS_N_INSNS (1), /* cost of movzx */
1934 8, /* "large" insn */
1935 17, /* MOVE_RATIO */
1936 4, /* cost for loading QImode using movzbl */
1937 {4, 4, 4}, /* cost of loading integer registers
1938 in QImode, HImode and SImode.
1939 Relative to reg-reg move (2). */
1940 {4, 4, 4}, /* cost of storing integer registers */
1941 4, /* cost of reg,reg fld/fst */
1942 {12, 12, 12}, /* cost of loading fp registers
1943 in SFmode, DFmode and XFmode */
1944 {6, 6, 8}, /* cost of storing fp registers
1945 in SFmode, DFmode and XFmode */
1946 2, /* cost of moving MMX register */
1947 {8, 8}, /* cost of loading MMX registers
1948 in SImode and DImode */
1949 {8, 8}, /* cost of storing MMX registers
1950 in SImode and DImode */
1951 2, /* cost of moving SSE register */
1952 {8, 8, 8}, /* cost of loading SSE registers
1953 in SImode, DImode and TImode */
1954 {8, 8, 8}, /* cost of storing SSE registers
1955 in SImode, DImode and TImode */
1956 5, /* MMX or SSE register to integer */
1957 32, /* size of l1 cache. */
1958 256, /* size of l2 cache. */
1959 64, /* size of prefetch block */
1960 6, /* number of parallel prefetches */
1961 3, /* Branch cost */
1962 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1963 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1964 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1965 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1966 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1967 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1970 1, /* scalar_stmt_cost. */
1971 1, /* scalar load_cost. */
1972 1, /* scalar_store_cost. */
1973 1, /* vec_stmt_cost. */
1974 4, /* vec_to_scalar_cost. */
1975 1, /* scalar_to_vec_cost. */
1976 1, /* vec_align_load_cost. */
1977 2, /* vec_unalign_load_cost. */
1978 1, /* vec_store_cost. */
1979 3, /* cond_taken_branch_cost. */
1980 1, /* cond_not_taken_branch_cost. */
1983 /* Generic should produce code tuned for Core-i7 (and newer chips)
1984 and btver1 (and newer chips). */
1986 static stringop_algs generic_memcpy[2] = {
1987 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1988 {-1, libcall, false}}},
1989 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1990 {-1, libcall, false}}}};
1991 static stringop_algs generic_memset[2] = {
1992 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1993 {-1, libcall, false}}},
1994 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1995 {-1, libcall, false}}}};
1997 struct processor_costs generic_cost = {
1998 COSTS_N_INSNS (1), /* cost of an add instruction */
1999 /* On all chips taken into consideration lea is 2 cycles and more. With
2000 this cost however our current implementation of synth_mult results in
2001 use of unnecessary temporary registers causing regression on several
2002 SPECfp benchmarks. */
2003 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2004 COSTS_N_INSNS (1), /* variable shift costs */
2005 COSTS_N_INSNS (1), /* constant shift costs */
2006 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2007 COSTS_N_INSNS (4), /* HI */
2008 COSTS_N_INSNS (3), /* SI */
2009 COSTS_N_INSNS (4), /* DI */
2010 COSTS_N_INSNS (2)}, /* other */
2011 0, /* cost of multiply per each bit set */
2012 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2013 COSTS_N_INSNS (26), /* HI */
2014 COSTS_N_INSNS (42), /* SI */
2015 COSTS_N_INSNS (74), /* DI */
2016 COSTS_N_INSNS (74)}, /* other */
2017 COSTS_N_INSNS (1), /* cost of movsx */
2018 COSTS_N_INSNS (1), /* cost of movzx */
2019 8, /* "large" insn */
2020 17, /* MOVE_RATIO */
2021 4, /* cost for loading QImode using movzbl */
2022 {4, 4, 4}, /* cost of loading integer registers
2023 in QImode, HImode and SImode.
2024 Relative to reg-reg move (2). */
2025 {4, 4, 4}, /* cost of storing integer registers */
2026 4, /* cost of reg,reg fld/fst */
2027 {12, 12, 12}, /* cost of loading fp registers
2028 in SFmode, DFmode and XFmode */
2029 {6, 6, 8}, /* cost of storing fp registers
2030 in SFmode, DFmode and XFmode */
2031 2, /* cost of moving MMX register */
2032 {8, 8}, /* cost of loading MMX registers
2033 in SImode and DImode */
2034 {8, 8}, /* cost of storing MMX registers
2035 in SImode and DImode */
2036 2, /* cost of moving SSE register */
2037 {8, 8, 8}, /* cost of loading SSE registers
2038 in SImode, DImode and TImode */
2039 {8, 8, 8}, /* cost of storing SSE registers
2040 in SImode, DImode and TImode */
2041 5, /* MMX or SSE register to integer */
2042 32, /* size of l1 cache. */
2043 512, /* size of l2 cache. */
2044 64, /* size of prefetch block */
2045 6, /* number of parallel prefetches */
2046 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2047 value is increased to perhaps more appropriate value of 5. */
2048 3, /* Branch cost */
2049 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2050 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2051 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2052 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2053 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2054 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2057 1, /* scalar_stmt_cost. */
2058 1, /* scalar load_cost. */
2059 1, /* scalar_store_cost. */
2060 1, /* vec_stmt_cost. */
2061 1, /* vec_to_scalar_cost. */
2062 1, /* scalar_to_vec_cost. */
2063 1, /* vec_align_load_cost. */
2064 2, /* vec_unalign_load_cost. */
2065 1, /* vec_store_cost. */
2066 3, /* cond_taken_branch_cost. */
2067 1, /* cond_not_taken_branch_cost. */
2070 /* core_cost should produce code tuned for Core familly of CPUs. */
2071 static stringop_algs core_memcpy[2] = {
2072 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2073 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2074 {-1, libcall, false}}}};
2075 static stringop_algs core_memset[2] = {
2076 {libcall, {{6, loop_1_byte, true},
2078 {8192, rep_prefix_4_byte, true},
2079 {-1, libcall, false}}},
2080 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2081 {-1, libcall, false}}}};
2084 struct processor_costs core_cost = {
2085 COSTS_N_INSNS (1), /* cost of an add instruction */
2086 /* On all chips taken into consideration lea is 2 cycles and more. With
2087 this cost however our current implementation of synth_mult results in
2088 use of unnecessary temporary registers causing regression on several
2089 SPECfp benchmarks. */
2090 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2091 COSTS_N_INSNS (1), /* variable shift costs */
2092 COSTS_N_INSNS (1), /* constant shift costs */
2093 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2094 COSTS_N_INSNS (4), /* HI */
2095 COSTS_N_INSNS (3), /* SI */
2096 COSTS_N_INSNS (4), /* DI */
2097 COSTS_N_INSNS (2)}, /* other */
2098 0, /* cost of multiply per each bit set */
2099 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2100 COSTS_N_INSNS (26), /* HI */
2101 COSTS_N_INSNS (42), /* SI */
2102 COSTS_N_INSNS (74), /* DI */
2103 COSTS_N_INSNS (74)}, /* other */
2104 COSTS_N_INSNS (1), /* cost of movsx */
2105 COSTS_N_INSNS (1), /* cost of movzx */
2106 8, /* "large" insn */
2107 17, /* MOVE_RATIO */
2108 4, /* cost for loading QImode using movzbl */
2109 {4, 4, 4}, /* cost of loading integer registers
2110 in QImode, HImode and SImode.
2111 Relative to reg-reg move (2). */
2112 {4, 4, 4}, /* cost of storing integer registers */
2113 4, /* cost of reg,reg fld/fst */
2114 {12, 12, 12}, /* cost of loading fp registers
2115 in SFmode, DFmode and XFmode */
2116 {6, 6, 8}, /* cost of storing fp registers
2117 in SFmode, DFmode and XFmode */
2118 2, /* cost of moving MMX register */
2119 {8, 8}, /* cost of loading MMX registers
2120 in SImode and DImode */
2121 {8, 8}, /* cost of storing MMX registers
2122 in SImode and DImode */
2123 2, /* cost of moving SSE register */
2124 {8, 8, 8}, /* cost of loading SSE registers
2125 in SImode, DImode and TImode */
2126 {8, 8, 8}, /* cost of storing SSE registers
2127 in SImode, DImode and TImode */
2128 5, /* MMX or SSE register to integer */
2129 64, /* size of l1 cache. */
2130 512, /* size of l2 cache. */
2131 64, /* size of prefetch block */
2132 6, /* number of parallel prefetches */
2133 /* FIXME perhaps more appropriate value is 5. */
2134 3, /* Branch cost */
2135 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2136 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2137 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2138 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2139 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2140 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2143 1, /* scalar_stmt_cost. */
2144 1, /* scalar load_cost. */
2145 1, /* scalar_store_cost. */
2146 1, /* vec_stmt_cost. */
2147 1, /* vec_to_scalar_cost. */
2148 1, /* scalar_to_vec_cost. */
2149 1, /* vec_align_load_cost. */
2150 2, /* vec_unalign_load_cost. */
2151 1, /* vec_store_cost. */
2152 3, /* cond_taken_branch_cost. */
2153 1, /* cond_not_taken_branch_cost. */
2157 /* Set by -mtune. */
2158 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2160 /* Set by -mtune or -Os. */
2161 const struct processor_costs *ix86_cost = &pentium_cost;
2163 /* Processor feature/optimization bitmasks. */
2164 #define m_386 (1<<PROCESSOR_I386)
2165 #define m_486 (1<<PROCESSOR_I486)
2166 #define m_PENT (1<<PROCESSOR_PENTIUM)
2167 #define m_LAKEMONT (1<<PROCESSOR_LAKEMONT)
2168 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2169 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2170 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2171 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2172 #define m_CORE2 (1<<PROCESSOR_CORE2)
2173 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2174 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2175 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2176 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2177 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2178 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2179 #define m_KNL (1<<PROCESSOR_KNL)
2180 #define m_SKYLAKE_AVX512 (1<<PROCESSOR_SKYLAKE_AVX512)
2181 #define m_INTEL (1<<PROCESSOR_INTEL)
2183 #define m_GEODE (1<<PROCESSOR_GEODE)
2184 #define m_K6 (1<<PROCESSOR_K6)
2185 #define m_K6_GEODE (m_K6 | m_GEODE)
2186 #define m_K8 (1<<PROCESSOR_K8)
2187 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2188 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2189 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2190 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2191 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2192 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2193 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2194 #define m_ZNVER1 (1<<PROCESSOR_ZNVER1)
2195 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2196 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2197 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2198 #define m_BTVER (m_BTVER1 | m_BTVER2)
2199 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2202 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2204 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2206 #define DEF_TUNE(tune, name, selector) name,
2207 #include "x86-tune.def"
2211 /* Feature tests against the various tunings. */
2212 unsigned char ix86_tune_features[X86_TUNE_LAST];
2214 /* Feature tests against the various tunings used to create ix86_tune_features
2215 based on the processor mask. */
2216 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2218 #define DEF_TUNE(tune, name, selector) selector,
2219 #include "x86-tune.def"
2223 /* Feature tests against the various architecture variations. */
2224 unsigned char ix86_arch_features[X86_ARCH_LAST];
2226 /* Feature tests against the various architecture variations, used to create
2227 ix86_arch_features based on the processor mask. */
2228 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2229 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2230 ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6),
2232 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2235 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2238 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2241 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2245 /* In case the average insn count for single function invocation is
2246 lower than this constant, emit fast (but longer) prologue and
2248 #define FAST_PROLOGUE_INSN_COUNT 20
2250 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2251 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2252 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2253 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2255 /* Array of the smallest class containing reg number REGNO, indexed by
2256 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2258 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2260 /* ax, dx, cx, bx */
2261 AREG, DREG, CREG, BREG,
2262 /* si, di, bp, sp */
2263 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2265 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2266 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2269 /* flags, fpsr, fpcr, frame */
2270 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2272 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2275 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2278 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2279 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2280 /* SSE REX registers */
2281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2283 /* AVX-512 SSE registers */
2284 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2285 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2286 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2287 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2288 /* Mask registers. */
2289 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2290 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2291 /* MPX bound registers */
2292 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2295 /* The "default" register map used in 32bit mode. */
2297 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2299 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2300 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2301 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2302 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2303 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2304 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2305 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2306 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2307 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2308 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2309 101, 102, 103, 104, /* bound registers */
2312 /* The "default" register map used in 64bit mode. */
2314 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2316 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2317 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2318 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2319 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2320 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2321 8,9,10,11,12,13,14,15, /* extended integer registers */
2322 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2323 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2324 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2325 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2326 126, 127, 128, 129, /* bound registers */
2329 /* Define the register numbers to be used in Dwarf debugging information.
2330 The SVR4 reference port C compiler uses the following register numbers
2331 in its Dwarf output code:
2332 0 for %eax (gcc regno = 0)
2333 1 for %ecx (gcc regno = 2)
2334 2 for %edx (gcc regno = 1)
2335 3 for %ebx (gcc regno = 3)
2336 4 for %esp (gcc regno = 7)
2337 5 for %ebp (gcc regno = 6)
2338 6 for %esi (gcc regno = 4)
2339 7 for %edi (gcc regno = 5)
2340 The following three DWARF register numbers are never generated by
2341 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2342 believes these numbers have these meanings.
2343 8 for %eip (no gcc equivalent)
2344 9 for %eflags (gcc regno = 17)
2345 10 for %trapno (no gcc equivalent)
2346 It is not at all clear how we should number the FP stack registers
2347 for the x86 architecture. If the version of SDB on x86/svr4 were
2348 a bit less brain dead with respect to floating-point then we would
2349 have a precedent to follow with respect to DWARF register numbers
2350 for x86 FP registers, but the SDB on x86/svr4 is so completely
2351 broken with respect to FP registers that it is hardly worth thinking
2352 of it as something to strive for compatibility with.
2353 The version of x86/svr4 SDB I have at the moment does (partially)
2354 seem to believe that DWARF register number 11 is associated with
2355 the x86 register %st(0), but that's about all. Higher DWARF
2356 register numbers don't seem to be associated with anything in
2357 particular, and even for DWARF regno 11, SDB only seems to under-
2358 stand that it should say that a variable lives in %st(0) (when
2359 asked via an `=' command) if we said it was in DWARF regno 11,
2360 but SDB still prints garbage when asked for the value of the
2361 variable in question (via a `/' command).
2362 (Also note that the labels SDB prints for various FP stack regs
2363 when doing an `x' command are all wrong.)
2364 Note that these problems generally don't affect the native SVR4
2365 C compiler because it doesn't allow the use of -O with -g and
2366 because when it is *not* optimizing, it allocates a memory
2367 location for each floating-point variable, and the memory
2368 location is what gets described in the DWARF AT_location
2369 attribute for the variable in question.
2370 Regardless of the severe mental illness of the x86/svr4 SDB, we
2371 do something sensible here and we use the following DWARF
2372 register numbers. Note that these are all stack-top-relative
2374 11 for %st(0) (gcc regno = 8)
2375 12 for %st(1) (gcc regno = 9)
2376 13 for %st(2) (gcc regno = 10)
2377 14 for %st(3) (gcc regno = 11)
2378 15 for %st(4) (gcc regno = 12)
2379 16 for %st(5) (gcc regno = 13)
2380 17 for %st(6) (gcc regno = 14)
2381 18 for %st(7) (gcc regno = 15)
2383 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2385 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2386 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2387 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2388 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2389 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2392 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2393 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2394 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2395 101, 102, 103, 104, /* bound registers */
2398 /* Define parameter passing and return registers. */
2400 static int const x86_64_int_parameter_registers[6] =
2402 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2405 static int const x86_64_ms_abi_int_parameter_registers[4] =
2407 CX_REG, DX_REG, R8_REG, R9_REG
2410 static int const x86_64_int_return_registers[4] =
2412 AX_REG, DX_REG, DI_REG, SI_REG
2415 /* Additional registers that are clobbered by SYSV calls. */
2417 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2421 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2422 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2425 /* Define the structure for the machine field in struct function. */
2427 struct GTY(()) stack_local_entry {
2428 unsigned short mode;
2431 struct stack_local_entry *next;
2434 /* Structure describing stack frame layout.
2435 Stack grows downward:
2441 saved static chain if ix86_static_chain_on_stack
2443 saved frame pointer if frame_pointer_needed
2444 <- HARD_FRAME_POINTER
2450 <- sse_regs_save_offset
2453 [va_arg registers] |
2457 [padding2] | = to_allocate
2466 int outgoing_arguments_size;
2468 /* The offsets relative to ARG_POINTER. */
2469 HOST_WIDE_INT frame_pointer_offset;
2470 HOST_WIDE_INT hard_frame_pointer_offset;
2471 HOST_WIDE_INT stack_pointer_offset;
2472 HOST_WIDE_INT hfp_save_offset;
2473 HOST_WIDE_INT reg_save_offset;
2474 HOST_WIDE_INT sse_reg_save_offset;
2476 /* When save_regs_using_mov is set, emit prologue using
2477 move instead of push instructions. */
2478 bool save_regs_using_mov;
2481 /* Which cpu are we scheduling for. */
2482 enum attr_cpu ix86_schedule;
2484 /* Which cpu are we optimizing for. */
2485 enum processor_type ix86_tune;
2487 /* Which instruction set architecture to use. */
2488 enum processor_type ix86_arch;
2490 /* True if processor has SSE prefetch instruction. */
2491 unsigned char x86_prefetch_sse;
2493 /* -mstackrealign option */
2494 static const char ix86_force_align_arg_pointer_string[]
2495 = "force_align_arg_pointer";
2497 static rtx (*ix86_gen_leave) (void);
2498 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2499 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2500 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2501 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2502 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2503 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2504 static rtx (*ix86_gen_clzero) (rtx);
2505 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2506 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2507 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2508 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2509 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2510 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2512 /* Preferred alignment for stack boundary in bits. */
2513 unsigned int ix86_preferred_stack_boundary;
2515 /* Alignment for incoming stack boundary in bits specified at
2517 static unsigned int ix86_user_incoming_stack_boundary;
2519 /* Default alignment for incoming stack boundary in bits. */
2520 static unsigned int ix86_default_incoming_stack_boundary;
2522 /* Alignment for incoming stack boundary in bits. */
2523 unsigned int ix86_incoming_stack_boundary;
2525 /* Calling abi specific va_list type nodes. */
2526 static GTY(()) tree sysv_va_list_type_node;
2527 static GTY(()) tree ms_va_list_type_node;
2529 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2530 char internal_label_prefix[16];
2531 int internal_label_prefix_len;
2533 /* Fence to use after loop using movnt. */
2536 /* Register class used for passing given 64bit part of the argument.
2537 These represent classes as documented by the PS ABI, with the exception
2538 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2539 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2541 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2542 whenever possible (upper half does contain padding). */
2543 enum x86_64_reg_class
2546 X86_64_INTEGER_CLASS,
2547 X86_64_INTEGERSI_CLASS,
2554 X86_64_COMPLEX_X87_CLASS,
2558 #define MAX_CLASSES 8
2560 /* Table of constants used by fldpi, fldln2, etc.... */
2561 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2562 static bool ext_80387_constants_init = 0;
2565 static struct machine_function * ix86_init_machine_status (void);
2566 static rtx ix86_function_value (const_tree, const_tree, bool);
2567 static bool ix86_function_value_regno_p (const unsigned int);
2568 static unsigned int ix86_function_arg_boundary (machine_mode,
2570 static rtx ix86_static_chain (const_tree, bool);
2571 static int ix86_function_regparm (const_tree, const_tree);
2572 static void ix86_compute_frame_layout (struct ix86_frame *);
2573 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2575 static void ix86_add_new_builtins (HOST_WIDE_INT);
2576 static tree ix86_canonical_va_list_type (tree);
2577 static void predict_jump (int);
2578 static unsigned int split_stack_prologue_scratch_regno (void);
2579 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2581 enum ix86_function_specific_strings
2583 IX86_FUNCTION_SPECIFIC_ARCH,
2584 IX86_FUNCTION_SPECIFIC_TUNE,
2585 IX86_FUNCTION_SPECIFIC_MAX
2588 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2589 const char *, enum fpmath_unit, bool);
2590 static void ix86_function_specific_save (struct cl_target_option *,
2591 struct gcc_options *opts);
2592 static void ix86_function_specific_restore (struct gcc_options *opts,
2593 struct cl_target_option *);
2594 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2595 static void ix86_function_specific_print (FILE *, int,
2596 struct cl_target_option *);
2597 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2598 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2599 struct gcc_options *,
2600 struct gcc_options *,
2601 struct gcc_options *);
2602 static bool ix86_can_inline_p (tree, tree);
2603 static void ix86_set_current_function (tree);
2604 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2606 static enum calling_abi ix86_function_abi (const_tree);
2609 #ifndef SUBTARGET32_DEFAULT_CPU
2610 #define SUBTARGET32_DEFAULT_CPU "i386"
2613 /* Whether -mtune= or -march= were specified */
2614 static int ix86_tune_defaulted;
2615 static int ix86_arch_specified;
2617 /* Vectorization library interface and handlers. */
2618 static tree (*ix86_veclib_handler) (combined_fn, tree, tree);
2620 static tree ix86_veclibabi_svml (combined_fn, tree, tree);
2621 static tree ix86_veclibabi_acml (combined_fn, tree, tree);
2623 /* Processor target table, indexed by processor number */
2626 const char *const name; /* processor name */
2627 const struct processor_costs *cost; /* Processor costs */
2628 const int align_loop; /* Default alignments. */
2629 const int align_loop_max_skip;
2630 const int align_jump;
2631 const int align_jump_max_skip;
2632 const int align_func;
2635 /* This table must be in sync with enum processor_type in i386.h. */
2636 static const struct ptt processor_target_table[PROCESSOR_max] =
2638 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2639 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2640 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2641 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2642 {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
2643 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2644 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2645 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2646 {"core2", &core_cost, 16, 10, 16, 10, 16},
2647 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2648 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2649 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2650 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2651 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2652 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2653 {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
2654 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2655 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2656 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2657 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2658 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2659 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2660 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2661 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2662 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2663 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2664 {"znver1", &znver1_cost, 16, 10, 16, 7, 11},
2665 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2666 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2670 rest_of_handle_insert_vzeroupper (void)
2674 /* vzeroupper instructions are inserted immediately after reload to
2675 account for possible spills from 256bit registers. The pass
2676 reuses mode switching infrastructure by re-running mode insertion
2677 pass, so disable entities that have already been processed. */
2678 for (i = 0; i < MAX_386_ENTITIES; i++)
2679 ix86_optimize_mode_switching[i] = 0;
2681 ix86_optimize_mode_switching[AVX_U128] = 1;
2683 /* Call optimize_mode_switching. */
2684 g->get_passes ()->execute_pass_mode_switching ();
2688 /* Return 1 if INSN uses or defines a hard register.
2689 Hard register uses in a memory address are ignored.
2690 Clobbers and flags definitions are ignored. */
2693 has_non_address_hard_reg (rtx_insn *insn)
2696 FOR_EACH_INSN_DEF (ref, insn)
2697 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
2698 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
2699 && DF_REF_REGNO (ref) != FLAGS_REG)
2702 FOR_EACH_INSN_USE (ref, insn)
2703 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
2709 /* Check if comparison INSN may be transformed
2710 into vector comparison. Currently we transform
2711 zero checks only which look like:
2713 (set (reg:CCZ 17 flags)
2714 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2715 (subreg:SI (reg:DI x) 0))
2716 (const_int 0 [0]))) */
2719 convertible_comparison_p (rtx_insn *insn)
2724 rtx def_set = single_set (insn);
2726 gcc_assert (def_set);
2728 rtx src = SET_SRC (def_set);
2729 rtx dst = SET_DEST (def_set);
2731 gcc_assert (GET_CODE (src) == COMPARE);
2733 if (GET_CODE (dst) != REG
2734 || REGNO (dst) != FLAGS_REG
2735 || GET_MODE (dst) != CCZmode)
2738 rtx op1 = XEXP (src, 0);
2739 rtx op2 = XEXP (src, 1);
2741 if (op2 != CONST0_RTX (GET_MODE (op2)))
2744 if (GET_CODE (op1) != IOR)
2747 op2 = XEXP (op1, 1);
2748 op1 = XEXP (op1, 0);
2752 || GET_MODE (op1) != SImode
2753 || GET_MODE (op2) != SImode
2754 || ((SUBREG_BYTE (op1) != 0
2755 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2756 && (SUBREG_BYTE (op2) != 0
2757 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2760 op1 = SUBREG_REG (op1);
2761 op2 = SUBREG_REG (op2);
2765 || GET_MODE (op1) != DImode)
2771 /* Return 1 if INSN may be converted into vector
2775 scalar_to_vector_candidate_p (rtx_insn *insn)
2777 rtx def_set = single_set (insn);
2782 if (has_non_address_hard_reg (insn))
2785 rtx src = SET_SRC (def_set);
2786 rtx dst = SET_DEST (def_set);
2788 if (GET_CODE (src) == COMPARE)
2789 return convertible_comparison_p (insn);
2791 /* We are interested in DImode promotion only. */
2792 if (GET_MODE (src) != DImode
2793 || GET_MODE (dst) != DImode)
2796 if (!REG_P (dst) && !MEM_P (dst))
2799 switch (GET_CODE (src))
2818 if (!REG_P (XEXP (src, 0)) && !MEM_P (XEXP (src, 0)))
2821 if (!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
2824 if (GET_MODE (XEXP (src, 0)) != DImode
2825 || GET_MODE (XEXP (src, 1)) != DImode)
2831 /* For a given bitmap of insn UIDs scans all instruction and
2832 remove insn from CANDIDATES in case it has both convertible
2833 and not convertible definitions.
2835 All insns in a bitmap are conversion candidates according to
2836 scalar_to_vector_candidate_p. Currently it implies all insns
2840 remove_non_convertible_regs (bitmap candidates)
2844 bitmap regs = BITMAP_ALLOC (NULL);
2846 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
2848 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
2849 rtx reg = SET_DEST (def_set);
2852 || bitmap_bit_p (regs, REGNO (reg))
2853 || HARD_REGISTER_P (reg))
2856 for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg));
2858 def = DF_REF_NEXT_REG (def))
2860 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2864 "r%d has non convertible definition in insn %d\n",
2865 REGNO (reg), DF_REF_INSN_UID (def));
2867 bitmap_set_bit (regs, REGNO (reg));
2873 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
2875 for (df_ref def = DF_REG_DEF_CHAIN (id);
2877 def = DF_REF_NEXT_REG (def))
2878 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
2881 fprintf (dump_file, "Removing insn %d from candidates list\n",
2882 DF_REF_INSN_UID (def));
2884 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
2897 static unsigned max_id;
2899 /* ID of a chain. */
2900 unsigned int chain_id;
2901 /* A queue of instructions to be included into a chain. */
2903 /* Instructions included into a chain. */
2905 /* All registers defined by a chain. */
2907 /* Registers used in both vector and sclar modes. */
2910 void build (bitmap candidates, unsigned insn_uid);
2911 int compute_convert_gain ();
2915 void add_insn (bitmap candidates, unsigned insn_uid);
2916 void add_to_queue (unsigned insn_uid);
2917 void mark_dual_mode_def (df_ref def);
2918 void analyze_register_chain (bitmap candidates, df_ref ref);
2919 rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
2920 void emit_conversion_insns (rtx insns, rtx_insn *pos);
2921 void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
2922 void convert_insn (rtx_insn *insn);
2923 void convert_op (rtx *op, rtx_insn *insn);
2924 void convert_reg (unsigned regno);
2925 void make_vector_copies (unsigned regno);
2928 unsigned scalar_chain::max_id = 0;
2930 /* Initialize new chain. */
2932 scalar_chain::scalar_chain ()
2934 chain_id = ++max_id;
2937 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
2939 bitmap_obstack_initialize (NULL);
2940 insns = BITMAP_ALLOC (NULL);
2941 defs = BITMAP_ALLOC (NULL);
2942 defs_conv = BITMAP_ALLOC (NULL);
2946 /* Free chain's data. */
2948 scalar_chain::~scalar_chain ()
2950 BITMAP_FREE (insns);
2952 BITMAP_FREE (defs_conv);
2953 bitmap_obstack_release (NULL);
2956 /* Add instruction into chains' queue. */
2959 scalar_chain::add_to_queue (unsigned insn_uid)
2961 if (bitmap_bit_p (insns, insn_uid)
2962 || bitmap_bit_p (queue, insn_uid))
2966 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
2967 insn_uid, chain_id);
2968 bitmap_set_bit (queue, insn_uid);
2971 /* Mark register defined by DEF as requiring conversion. */
2974 scalar_chain::mark_dual_mode_def (df_ref def)
2976 gcc_assert (DF_REF_REG_DEF_P (def));
2978 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def)))
2983 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
2984 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2986 bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
2989 /* Check REF's chain to add new insns into a queue
2990 and find registers requiring conversion. */
2993 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
2997 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
2998 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
2999 add_to_queue (DF_REF_INSN_UID (ref));
3001 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
3003 unsigned uid = DF_REF_INSN_UID (chain->ref);
3005 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
3008 if (!DF_REF_REG_MEM_P (chain->ref))
3010 if (bitmap_bit_p (insns, uid))
3013 if (bitmap_bit_p (candidates, uid))
3020 if (DF_REF_REG_DEF_P (chain->ref))
3023 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
3024 DF_REF_REGNO (chain->ref), uid);
3025 mark_dual_mode_def (chain->ref);
3030 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
3031 DF_REF_REGNO (chain->ref), uid);
3032 mark_dual_mode_def (ref);
3037 /* Add instruction into a chain. */
3040 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
3042 if (bitmap_bit_p (insns, insn_uid))
3046 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
3048 bitmap_set_bit (insns, insn_uid);
3050 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3051 rtx def_set = single_set (insn);
3052 if (def_set && REG_P (SET_DEST (def_set))
3053 && !HARD_REGISTER_P (SET_DEST (def_set)))
3054 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
3058 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3059 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
3060 for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref));
3062 def = DF_REF_NEXT_REG (def))
3063 analyze_register_chain (candidates, def);
3064 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
3065 if (!DF_REF_REG_MEM_P (ref))
3066 analyze_register_chain (candidates, ref);
3069 /* Build new chain starting from insn INSN_UID recursively
3070 adding all dependent uses and definitions. */
3073 scalar_chain::build (bitmap candidates, unsigned insn_uid)
3075 queue = BITMAP_ALLOC (NULL);
3076 bitmap_set_bit (queue, insn_uid);
3079 fprintf (dump_file, "Building chain #%d...\n", chain_id);
3081 while (!bitmap_empty_p (queue))
3083 insn_uid = bitmap_first_set_bit (queue);
3084 bitmap_clear_bit (queue, insn_uid);
3085 bitmap_clear_bit (candidates, insn_uid);
3086 add_insn (candidates, insn_uid);
3091 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
3092 fprintf (dump_file, " insns: ");
3093 dump_bitmap (dump_file, insns);
3094 if (!bitmap_empty_p (defs_conv))
3098 const char *comma = "";
3099 fprintf (dump_file, " defs to convert: ");
3100 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
3102 fprintf (dump_file, "%sr%d", comma, id);
3105 fprintf (dump_file, "\n");
3109 BITMAP_FREE (queue);
3112 /* Compute a gain for chain conversion. */
3115 scalar_chain::compute_convert_gain ()
3123 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
3125 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
3127 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
3128 rtx def_set = single_set (insn);
3129 rtx src = SET_SRC (def_set);
3130 rtx dst = SET_DEST (def_set);
3132 if (REG_P (src) && REG_P (dst))
3133 gain += COSTS_N_INSNS (2) - ix86_cost->sse_move;
3134 else if (REG_P (src) && MEM_P (dst))
3135 gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1];
3136 else if (MEM_P (src) && REG_P (dst))
3137 gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1];
3138 else if (GET_CODE (src) == PLUS
3139 || GET_CODE (src) == MINUS
3140 || GET_CODE (src) == IOR
3141 || GET_CODE (src) == XOR
3142 || GET_CODE (src) == AND)
3143 gain += ix86_cost->add;
3144 else if (GET_CODE (src) == COMPARE)
3146 /* Assume comparison cost is the same. */
3153 fprintf (dump_file, " Instruction convertion gain: %d\n", gain);
3155 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi)
3156 cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer;
3159 fprintf (dump_file, " Registers convertion cost: %d\n", cost);
3164 fprintf (dump_file, " Total gain: %d\n", gain);
3169 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3172 scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
3175 return gen_rtx_SUBREG (V2DImode, new_reg, 0);
3177 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
3179 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3182 XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg);
3183 else if (fmt[i] == 'E')
3184 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3185 XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j),
3192 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3195 scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
3197 replace_with_subreg (single_set (insn), reg, new_reg);
3200 /* Insert generated conversion instruction sequence INSNS
3201 after instruction AFTER. New BB may be required in case
3202 instruction has EH region attached. */
3205 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
3207 if (!control_flow_insn_p (after))
3209 emit_insn_after (insns, after);
3213 basic_block bb = BLOCK_FOR_INSN (after);
3214 edge e = find_fallthru_edge (bb->succs);
3217 basic_block new_bb = split_edge (e);
3218 emit_insn_after (insns, BB_HEAD (new_bb));
3221 /* Make vector copies for all register REGNO definitions
3222 and replace its uses in a chain. */
3225 scalar_chain::make_vector_copies (unsigned regno)
3227 rtx reg = regno_reg_rtx[regno];
3228 rtx vreg = gen_reg_rtx (DImode);
3231 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3232 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3234 rtx_insn *insn = DF_REF_INSN (ref);
3239 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3240 CONST0_RTX (V4SImode),
3241 gen_rtx_SUBREG (SImode, reg, 0)));
3242 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
3243 gen_rtx_SUBREG (V4SImode, vreg, 0),
3244 gen_rtx_SUBREG (SImode, reg, 4),
3247 else if (TARGET_INTER_UNIT_MOVES_TO_VEC)
3249 rtx tmp = gen_reg_rtx (DImode);
3250 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
3251 CONST0_RTX (V4SImode),
3252 gen_rtx_SUBREG (SImode, reg, 0)));
3253 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
3254 CONST0_RTX (V4SImode),
3255 gen_rtx_SUBREG (SImode, reg, 4)));
3256 emit_insn (gen_vec_interleave_lowv4si
3257 (gen_rtx_SUBREG (V4SImode, vreg, 0),
3258 gen_rtx_SUBREG (V4SImode, vreg, 0),
3259 gen_rtx_SUBREG (V4SImode, tmp, 0)));
3263 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3264 emit_move_insn (adjust_address (tmp, SImode, 0),
3265 gen_rtx_SUBREG (SImode, reg, 0));
3266 emit_move_insn (adjust_address (tmp, SImode, 4),
3267 gen_rtx_SUBREG (SImode, reg, 4));
3268 emit_move_insn (vreg, tmp);
3270 emit_conversion_insns (get_insns (), insn);
3275 " Copied r%d to a vector register r%d for insn %d\n",
3276 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3279 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3280 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3282 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, vreg);
3285 fprintf (dump_file, " Replaced r%d with r%d in insn %d\n",
3286 regno, REGNO (vreg), DF_REF_INSN_UID (ref));
3290 /* Convert all definitions of register REGNO
3291 and fix its uses. Scalar copies may be created
3292 in case register is used in not convertible insn. */
3295 scalar_chain::convert_reg (unsigned regno)
3297 bool scalar_copy = bitmap_bit_p (defs_conv, regno);
3298 rtx reg = regno_reg_rtx[regno];
3299 rtx scopy = NULL_RTX;
3303 conv = BITMAP_ALLOC (NULL);
3304 bitmap_copy (conv, insns);
3307 scopy = gen_reg_rtx (DImode);
3309 for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3311 rtx_insn *insn = DF_REF_INSN (ref);
3312 rtx def_set = single_set (insn);
3313 rtx src = SET_SRC (def_set);
3314 rtx reg = DF_REF_REG (ref);
3318 replace_with_subreg_in_insn (insn, reg, reg);
3319 bitmap_clear_bit (conv, INSN_UID (insn));
3324 rtx vcopy = gen_reg_rtx (V2DImode);
3327 if (TARGET_INTER_UNIT_MOVES_FROM_VEC)
3329 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0));
3330 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3331 gen_rtx_SUBREG (SImode, vcopy, 0));
3332 emit_move_insn (vcopy,
3333 gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32)));
3334 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3335 gen_rtx_SUBREG (SImode, vcopy, 0));
3339 rtx tmp = assign_386_stack_local (DImode, SLOT_TEMP);
3340 emit_move_insn (tmp, reg);
3341 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0),
3342 adjust_address (tmp, SImode, 0));
3343 emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4),
3344 adjust_address (tmp, SImode, 4));
3346 emit_conversion_insns (get_insns (), insn);
3351 " Copied r%d to a scalar register r%d for insn %d\n",
3352 regno, REGNO (scopy), INSN_UID (insn));
3356 for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref))
3357 if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)))
3359 if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref)))
3361 rtx def_set = single_set (DF_REF_INSN (ref));
3362 if (!MEM_P (SET_DEST (def_set))
3363 || !REG_P (SET_SRC (def_set)))
3364 replace_with_subreg_in_insn (DF_REF_INSN (ref), reg, reg);
3365 bitmap_clear_bit (conv, DF_REF_INSN_UID (ref));
3368 else if (NONDEBUG_INSN_P (DF_REF_INSN (ref)))
3370 replace_rtx (DF_REF_INSN (ref), reg, scopy);
3371 df_insn_rescan (DF_REF_INSN (ref));
3377 /* Convert operand OP in INSN. All register uses
3378 are converted during registers conversion.
3379 Therefore we should just handle memory operands. */
3382 scalar_chain::convert_op (rtx *op, rtx_insn *insn)
3384 *op = copy_rtx_if_shared (*op);
3388 rtx tmp = gen_reg_rtx (DImode);
3390 emit_insn_before (gen_move_insn (tmp, *op), insn);
3391 *op = gen_rtx_SUBREG (V2DImode, tmp, 0);
3394 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
3395 INSN_UID (insn), REGNO (tmp));
3399 gcc_assert (SUBREG_P (*op));
3400 gcc_assert (GET_MODE (*op) == V2DImode);
3404 /* Convert INSN to vector mode. */
3407 scalar_chain::convert_insn (rtx_insn *insn)
3409 rtx def_set = single_set (insn);
3410 rtx src = SET_SRC (def_set);
3411 rtx dst = SET_DEST (def_set);
3414 if (MEM_P (dst) && !REG_P (src))
3416 /* There are no scalar integer instructions and therefore
3417 temporary register usage is required. */
3418 rtx tmp = gen_reg_rtx (DImode);
3419 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
3420 dst = gen_rtx_SUBREG (V2DImode, tmp, 0);
3423 switch (GET_CODE (src))
3430 convert_op (&XEXP (src, 0), insn);
3431 convert_op (&XEXP (src, 1), insn);
3432 PUT_MODE (src, V2DImode);
3437 convert_op (&src, insn);
3444 gcc_assert (GET_MODE (src) == V2DImode);
3448 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
3450 gcc_assert ((REG_P (src) && GET_MODE (src) == DImode)
3451 || (SUBREG_P (src) && GET_MODE (src) == V2DImode));
3454 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
3456 subreg = copy_rtx_if_shared (src);
3457 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
3458 copy_rtx_if_shared (subreg),
3459 copy_rtx_if_shared (subreg)),
3461 dst = gen_rtx_REG (CCmode, FLAGS_REG);
3462 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src),
3463 copy_rtx_if_shared (src)),
3471 SET_SRC (def_set) = src;
3472 SET_DEST (def_set) = dst;
3474 /* Drop possible dead definitions. */
3475 PATTERN (insn) = def_set;
3477 INSN_CODE (insn) = -1;
3478 recog_memoized (insn);
3479 df_insn_rescan (insn);
3482 /* Convert whole chain creating required register
3483 conversions and copies. */
3486 scalar_chain::convert ()
3490 int converted_insns = 0;
3492 if (!dbg_cnt (stv_conversion))
3496 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
3498 EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
3501 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
3502 make_vector_copies (id);
3504 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
3506 convert_insn (DF_INSN_UID_GET (id)->insn);
3510 return converted_insns;
3513 /* Main STV pass function. Find and convert scalar
3514 instructions into vector mode when profitable. */
3517 convert_scalars_to_vector ()
3521 int converted_insns = 0;
3523 bitmap_obstack_initialize (NULL);
3524 candidates = BITMAP_ALLOC (NULL);
3526 calculate_dominance_info (CDI_DOMINATORS);
3527 df_set_flags (DF_DEFER_INSN_RESCAN);
3528 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
3529 df_md_add_problem ();
3532 /* Find all instructions we want to convert into vector mode. */
3534 fprintf (dump_file, "Searching for mode convertion candidates...\n");
3536 FOR_EACH_BB_FN (bb, cfun)
3539 FOR_BB_INSNS (bb, insn)
3540 if (scalar_to_vector_candidate_p (insn))
3543 fprintf (dump_file, " insn %d is marked as a candidate\n",
3546 bitmap_set_bit (candidates, INSN_UID (insn));
3550 remove_non_convertible_regs (candidates);
3552 if (bitmap_empty_p (candidates))
3554 fprintf (dump_file, "There are no candidates for optimization.\n");
3556 while (!bitmap_empty_p (candidates))
3558 unsigned uid = bitmap_first_set_bit (candidates);
3561 /* Find instructions chain we want to convert to vector mode.
3562 Check all uses and definitions to estimate all required
3564 chain.build (candidates, uid);
3566 if (chain.compute_convert_gain () > 0)
3567 converted_insns += chain.convert ();
3570 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
3575 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
3577 BITMAP_FREE (candidates);
3578 bitmap_obstack_release (NULL);
3579 df_process_deferred_rescans ();
3581 /* Conversion means we may have 128bit register spills/fills
3582 which require aligned stack. */
3583 if (converted_insns)
3585 if (crtl->stack_alignment_needed < 128)
3586 crtl->stack_alignment_needed = 128;
3587 if (crtl->stack_alignment_estimated < 128)
3588 crtl->stack_alignment_estimated = 128;
3596 const pass_data pass_data_insert_vzeroupper =
3598 RTL_PASS, /* type */
3599 "vzeroupper", /* name */
3600 OPTGROUP_NONE, /* optinfo_flags */
3601 TV_NONE, /* tv_id */
3602 0, /* properties_required */
3603 0, /* properties_provided */
3604 0, /* properties_destroyed */
3605 0, /* todo_flags_start */
3606 TODO_df_finish, /* todo_flags_finish */
3609 class pass_insert_vzeroupper : public rtl_opt_pass
3612 pass_insert_vzeroupper(gcc::context *ctxt)
3613 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
3616 /* opt_pass methods: */
3617 virtual bool gate (function *)
3619 return TARGET_AVX && !TARGET_AVX512F
3620 && TARGET_VZEROUPPER && flag_expensive_optimizations
3624 virtual unsigned int execute (function *)
3626 return rest_of_handle_insert_vzeroupper ();
3629 }; // class pass_insert_vzeroupper
3631 const pass_data pass_data_stv =
3633 RTL_PASS, /* type */
3635 OPTGROUP_NONE, /* optinfo_flags */
3636 TV_NONE, /* tv_id */
3637 0, /* properties_required */
3638 0, /* properties_provided */
3639 0, /* properties_destroyed */
3640 0, /* todo_flags_start */
3641 TODO_df_finish, /* todo_flags_finish */
3644 class pass_stv : public rtl_opt_pass
3647 pass_stv (gcc::context *ctxt)
3648 : rtl_opt_pass (pass_data_stv, ctxt)
3651 /* opt_pass methods: */
3652 virtual bool gate (function *)
3654 return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
3657 virtual unsigned int execute (function *)
3659 return convert_scalars_to_vector ();
3662 }; // class pass_stv
3667 make_pass_insert_vzeroupper (gcc::context *ctxt)
3669 return new pass_insert_vzeroupper (ctxt);
3673 make_pass_stv (gcc::context *ctxt)
3675 return new pass_stv (ctxt);
3678 /* Return true if a red-zone is in use. */
3681 ix86_using_red_zone (void)
3683 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
3686 /* Return a string that documents the current -m options. The caller is
3687 responsible for freeing the string. */
3690 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
3691 const char *tune, enum fpmath_unit fpmath,
3694 struct ix86_target_opts
3696 const char *option; /* option string */
3697 HOST_WIDE_INT mask; /* isa mask options */
3700 /* This table is ordered so that options like -msse4.2 that imply
3701 preceding options while match those first. */
3702 static struct ix86_target_opts isa_opts[] =
3704 { "-mfma4", OPTION_MASK_ISA_FMA4 },
3705 { "-mfma", OPTION_MASK_ISA_FMA },
3706 { "-mxop", OPTION_MASK_ISA_XOP },
3707 { "-mlwp", OPTION_MASK_ISA_LWP },
3708 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
3709 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
3710 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
3711 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
3712 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
3713 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
3714 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
3715 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
3716 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
3717 { "-msse4a", OPTION_MASK_ISA_SSE4A },
3718 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
3719 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
3720 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
3721 { "-msse3", OPTION_MASK_ISA_SSE3 },
3722 { "-msse2", OPTION_MASK_ISA_SSE2 },
3723 { "-msse", OPTION_MASK_ISA_SSE },
3724 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
3725 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
3726 { "-mmmx", OPTION_MASK_ISA_MMX },
3727 { "-mabm", OPTION_MASK_ISA_ABM },
3728 { "-mbmi", OPTION_MASK_ISA_BMI },
3729 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
3730 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
3731 { "-mhle", OPTION_MASK_ISA_HLE },
3732 { "-mfxsr", OPTION_MASK_ISA_FXSR },
3733 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
3734 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
3735 { "-madx", OPTION_MASK_ISA_ADX },
3736 { "-mtbm", OPTION_MASK_ISA_TBM },
3737 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
3738 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
3739 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
3740 { "-maes", OPTION_MASK_ISA_AES },
3741 { "-msha", OPTION_MASK_ISA_SHA },
3742 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
3743 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
3744 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
3745 { "-mf16c", OPTION_MASK_ISA_F16C },
3746 { "-mrtm", OPTION_MASK_ISA_RTM },
3747 { "-mxsave", OPTION_MASK_ISA_XSAVE },
3748 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
3749 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
3750 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
3751 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
3752 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
3753 { "-mmpx", OPTION_MASK_ISA_MPX },
3754 { "-mclwb", OPTION_MASK_ISA_CLWB },
3755 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
3756 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
3757 { "-mclzero", OPTION_MASK_ISA_CLZERO },
3758 { "-mpku", OPTION_MASK_ISA_PKU },
3762 static struct ix86_target_opts flag_opts[] =
3764 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
3765 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
3766 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
3767 { "-m80387", MASK_80387 },
3768 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
3769 { "-malign-double", MASK_ALIGN_DOUBLE },
3770 { "-mcld", MASK_CLD },
3771 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
3772 { "-mieee-fp", MASK_IEEE_FP },
3773 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
3774 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
3775 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
3776 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
3777 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
3778 { "-mno-push-args", MASK_NO_PUSH_ARGS },
3779 { "-mno-red-zone", MASK_NO_RED_ZONE },
3780 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
3781 { "-mrecip", MASK_RECIP },
3782 { "-mrtd", MASK_RTD },
3783 { "-msseregparm", MASK_SSEREGPARM },
3784 { "-mstack-arg-probe", MASK_STACK_PROBE },
3785 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
3786 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
3787 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
3788 { "-mvzeroupper", MASK_VZEROUPPER },
3789 { "-mstv", MASK_STV},
3790 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
3791 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
3792 { "-mprefer-avx128", MASK_PREFER_AVX128},
3795 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
3798 char target_other[40];
3808 memset (opts, '\0', sizeof (opts));
3810 /* Add -march= option. */
3813 opts[num][0] = "-march=";
3814 opts[num++][1] = arch;
3817 /* Add -mtune= option. */
3820 opts[num][0] = "-mtune=";
3821 opts[num++][1] = tune;
3824 /* Add -m32/-m64/-mx32. */
3825 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
3827 if ((isa & OPTION_MASK_ABI_64) != 0)
3831 isa &= ~ (OPTION_MASK_ISA_64BIT
3832 | OPTION_MASK_ABI_64
3833 | OPTION_MASK_ABI_X32);
3837 opts[num++][0] = abi;
3839 /* Pick out the options in isa options. */
3840 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
3842 if ((isa & isa_opts[i].mask) != 0)
3844 opts[num++][0] = isa_opts[i].option;
3845 isa &= ~ isa_opts[i].mask;
3849 if (isa && add_nl_p)
3851 opts[num++][0] = isa_other;
3852 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
3856 /* Add flag options. */
3857 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
3859 if ((flags & flag_opts[i].mask) != 0)
3861 opts[num++][0] = flag_opts[i].option;
3862 flags &= ~ flag_opts[i].mask;
3866 if (flags && add_nl_p)
3868 opts[num++][0] = target_other;
3869 sprintf (target_other, "(other flags: %#x)", flags);
3872 /* Add -fpmath= option. */
3875 opts[num][0] = "-mfpmath=";
3876 switch ((int) fpmath)
3879 opts[num++][1] = "387";
3883 opts[num++][1] = "sse";
3886 case FPMATH_387 | FPMATH_SSE:
3887 opts[num++][1] = "sse+387";
3899 gcc_assert (num < ARRAY_SIZE (opts));
3901 /* Size the string. */
3903 sep_len = (add_nl_p) ? 3 : 1;
3904 for (i = 0; i < num; i++)
3907 for (j = 0; j < 2; j++)
3909 len += strlen (opts[i][j]);
3912 /* Build the string. */
3913 ret = ptr = (char *) xmalloc (len);
3916 for (i = 0; i < num; i++)
3920 for (j = 0; j < 2; j++)
3921 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
3928 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
3936 for (j = 0; j < 2; j++)
3939 memcpy (ptr, opts[i][j], len2[j]);
3941 line_len += len2[j];
3946 gcc_assert (ret + len >= ptr);
3951 /* Return true, if profiling code should be emitted before
3952 prologue. Otherwise it returns false.
3953 Note: For x86 with "hotfix" it is sorried. */
3955 ix86_profile_before_prologue (void)
3957 return flag_fentry != 0;
3960 /* Function that is callable from the debugger to print the current
3962 void ATTRIBUTE_UNUSED
3963 ix86_debug_options (void)
3965 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
3966 ix86_arch_string, ix86_tune_string,
3971 fprintf (stderr, "%s\n\n", opts);
3975 fputs ("<no options>\n\n", stderr);
3980 /* Return true if T is one of the bytes we should avoid with
3984 ix86_rop_should_change_byte_p (int t)
3986 return t == 0xc2 || t == 0xc3 || t == 0xca || t == 0xcb;
3989 static const char *stringop_alg_names[] = {
3991 #define DEF_ALG(alg, name) #name,
3992 #include "stringop.def"
3997 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3998 The string is of the following form (or comma separated list of it):
4000 strategy_alg:max_size:[align|noalign]
4002 where the full size range for the strategy is either [0, max_size] or
4003 [min_size, max_size], in which min_size is the max_size + 1 of the
4004 preceding range. The last size range must have max_size == -1.
4009 -mmemcpy-strategy=libcall:-1:noalign
4011 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
4015 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
4017 This is to tell the compiler to use the following strategy for memset
4018 1) when the expected size is between [1, 16], use rep_8byte strategy;
4019 2) when the size is between [17, 2048], use vector_loop;
4020 3) when the size is > 2048, use libcall. */
4022 struct stringop_size_range
4030 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
4032 const struct stringop_algs *default_algs;
4033 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
4034 char *curr_range_str, *next_range_str;
4038 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
4040 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
4042 curr_range_str = strategy_str;
4049 next_range_str = strchr (curr_range_str, ',');
4051 *next_range_str++ = '\0';
4053 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
4054 alg_name, &maxs, align))
4056 error ("wrong arg %s to option %s", curr_range_str,
4057 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4061 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
4063 error ("size ranges of option %s should be increasing",
4064 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4068 for (i = 0; i < last_alg; i++)
4069 if (!strcmp (alg_name, stringop_alg_names[i]))
4074 error ("wrong stringop strategy name %s specified for option %s",
4076 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4080 if ((stringop_alg) i == rep_prefix_8_byte
4083 /* rep; movq isn't available in 32-bit code. */
4084 error ("stringop strategy name %s specified for option %s "
4085 "not supported for 32-bit code",
4087 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4091 input_ranges[n].max = maxs;
4092 input_ranges[n].alg = (stringop_alg) i;
4093 if (!strcmp (align, "align"))
4094 input_ranges[n].noalign = false;
4095 else if (!strcmp (align, "noalign"))
4096 input_ranges[n].noalign = true;
4099 error ("unknown alignment %s specified for option %s",
4100 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4104 curr_range_str = next_range_str;
4106 while (curr_range_str);
4108 if (input_ranges[n - 1].max != -1)
4110 error ("the max value for the last size range should be -1"
4112 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4116 if (n > MAX_STRINGOP_ALGS)
4118 error ("too many size ranges specified in option %s",
4119 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
4123 /* Now override the default algs array. */
4124 for (i = 0; i < n; i++)
4126 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
4127 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
4128 = input_ranges[i].alg;
4129 *const_cast<int *>(&default_algs->size[i].noalign)
4130 = input_ranges[i].noalign;
4135 /* parse -mtune-ctrl= option. When DUMP is true,
4136 print the features that are explicitly set. */
4139 parse_mtune_ctrl_str (bool dump)
4141 if (!ix86_tune_ctrl_string)
4144 char *next_feature_string = NULL;
4145 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
4146 char *orig = curr_feature_string;
4152 next_feature_string = strchr (curr_feature_string, ',');
4153 if (next_feature_string)
4154 *next_feature_string++ = '\0';
4155 if (*curr_feature_string == '^')
4157 curr_feature_string++;
4160 for (i = 0; i < X86_TUNE_LAST; i++)
4162 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
4164 ix86_tune_features[i] = !clear;
4166 fprintf (stderr, "Explicitly %s feature %s\n",
4167 clear ? "clear" : "set", ix86_tune_feature_names[i]);
4171 if (i == X86_TUNE_LAST)
4172 error ("Unknown parameter to option -mtune-ctrl: %s",
4173 clear ? curr_feature_string - 1 : curr_feature_string);
4174 curr_feature_string = next_feature_string;
4176 while (curr_feature_string);
4180 /* Helper function to set ix86_tune_features. IX86_TUNE is the
4184 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
4186 unsigned int ix86_tune_mask = 1u << ix86_tune;
4189 for (i = 0; i < X86_TUNE_LAST; ++i)
4191 if (ix86_tune_no_default)
4192 ix86_tune_features[i] = 0;
4194 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4199 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
4200 for (i = 0; i < X86_TUNE_LAST; i++)
4201 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
4202 ix86_tune_features[i] ? "on" : "off");
4205 parse_mtune_ctrl_str (dump);
4209 /* Default align_* from the processor table. */
4212 ix86_default_align (struct gcc_options *opts)
4214 if (opts->x_align_loops == 0)
4216 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
4217 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
4219 if (opts->x_align_jumps == 0)
4221 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
4222 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
4224 if (opts->x_align_functions == 0)
4226 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
4230 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
4233 ix86_override_options_after_change (void)
4235 ix86_default_align (&global_options);
4238 /* Override various settings based on options. If MAIN_ARGS_P, the
4239 options are from the command line, otherwise they are from
4243 ix86_option_override_internal (bool main_args_p,
4244 struct gcc_options *opts,
4245 struct gcc_options *opts_set)
4248 unsigned int ix86_arch_mask;
4249 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
4254 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
4255 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
4256 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
4257 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
4258 #define PTA_AES (HOST_WIDE_INT_1 << 4)
4259 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
4260 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
4261 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
4262 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
4263 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
4264 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
4265 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
4266 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
4267 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
4268 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
4269 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
4270 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
4271 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
4272 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
4273 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
4274 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
4275 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
4276 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
4277 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
4278 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
4279 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
4280 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
4281 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
4282 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
4283 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
4284 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
4285 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
4286 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
4287 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
4288 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
4289 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
4290 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
4291 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
4292 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
4293 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
4294 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
4295 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
4296 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
4297 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
4298 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
4299 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
4300 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
4301 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
4302 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
4303 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
4304 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
4305 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
4306 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
4307 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
4308 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
4309 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
4310 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
4311 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
4312 #define PTA_CLZERO (HOST_WIDE_INT_1 << 58)
4313 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 59)
4314 #define PTA_PKU (HOST_WIDE_INT_1 << 60)
4317 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
4318 | PTA_CX16 | PTA_FXSR)
4319 #define PTA_NEHALEM \
4320 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
4321 #define PTA_WESTMERE \
4322 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
4323 #define PTA_SANDYBRIDGE \
4324 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
4325 #define PTA_IVYBRIDGE \
4326 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
4327 #define PTA_HASWELL \
4328 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
4329 | PTA_FMA | PTA_MOVBE | PTA_HLE)
4330 #define PTA_BROADWELL \
4331 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
4332 #define PTA_SKYLAKE \
4333 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
4334 #define PTA_SKYLAKE_AVX512 \
4335 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
4336 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
4338 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
4339 #define PTA_BONNELL \
4340 (PTA_CORE2 | PTA_MOVBE)
4341 #define PTA_SILVERMONT \
4342 (PTA_WESTMERE | PTA_MOVBE)
4344 /* if this reaches 64, need to widen struct pta flags below */
4348 const char *const name; /* processor name or nickname. */
4349 const enum processor_type processor;
4350 const enum attr_cpu schedule;
4351 const unsigned HOST_WIDE_INT flags;
4353 const processor_alias_table[] =
4355 {"i386", PROCESSOR_I386, CPU_NONE, 0},
4356 {"i486", PROCESSOR_I486, CPU_NONE, 0},
4357 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4358 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
4359 {"lakemont", PROCESSOR_LAKEMONT, CPU_PENTIUM, PTA_NO_80387},
4360 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
4361 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
4362 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4363 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4364 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4365 PTA_MMX | PTA_SSE | PTA_FXSR},
4366 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4367 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
4368 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
4369 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4370 PTA_MMX | PTA_SSE | PTA_FXSR},
4371 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4372 PTA_MMX | PTA_SSE | PTA_FXSR},
4373 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
4374 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4375 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
4376 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
4377 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
4378 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
4379 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
4380 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
4381 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
4382 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4383 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
4384 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
4385 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4386 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
4387 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
4388 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4390 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4392 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4394 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
4396 {"haswell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4397 {"core-avx2", PROCESSOR_HASWELL, CPU_HASWELL, PTA_HASWELL},
4398 {"broadwell", PROCESSOR_HASWELL, CPU_HASWELL, PTA_BROADWELL},
4399 {"skylake", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE},
4400 {"skylake-avx512", PROCESSOR_HASWELL, CPU_HASWELL, PTA_SKYLAKE_AVX512},
4401 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4402 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
4403 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4404 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
4405 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
4406 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
4407 {"geode", PROCESSOR_GEODE, CPU_GEODE,
4408 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4409 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
4410 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4411 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
4412 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
4413 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4414 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
4415 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
4416 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
4417 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4418 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
4419 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4420 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
4421 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
4422 {"x86-64", PROCESSOR_K8, CPU_K8,
4423 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
4424 {"k8", PROCESSOR_K8, CPU_K8,
4425 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4426 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4427 {"k8-sse3", PROCESSOR_K8, CPU_K8,
4428 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4429 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4430 {"opteron", PROCESSOR_K8, CPU_K8,
4431 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4432 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4433 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
4434 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4435 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4436 {"athlon64", PROCESSOR_K8, CPU_K8,
4437 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4438 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4439 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
4440 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4441 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4442 {"athlon-fx", PROCESSOR_K8, CPU_K8,
4443 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
4444 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
4445 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4446 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4447 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4448 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
4449 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
4450 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
4451 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
4452 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4453 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4454 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4455 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4456 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
4457 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4458 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4459 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4460 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4461 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
4462 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
4463 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4464 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4465 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
4466 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
4467 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
4468 | PTA_XSAVEOPT | PTA_FSGSBASE},
4469 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
4470 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4471 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4472 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4473 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
4474 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
4475 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
4476 | PTA_MOVBE | PTA_MWAITX},
4477 {"znver1", PROCESSOR_ZNVER1, CPU_ZNVER1,
4478 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4479 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
4480 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
4481 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
4482 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
4483 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
4484 | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES
4485 | PTA_SHA | PTA_LZCNT | PTA_POPCNT},
4486 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
4487 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4488 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
4489 | PTA_FXSR | PTA_XSAVE},
4490 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
4491 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
4492 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
4493 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
4494 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
4495 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
4497 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
4499 | PTA_HLE /* flags are only used for -march switch. */ },
4502 /* -mrecip options. */
4505 const char *string; /* option name */
4506 unsigned int mask; /* mask bits to set */
4508 const recip_options[] =
4510 { "all", RECIP_MASK_ALL },
4511 { "none", RECIP_MASK_NONE },
4512 { "div", RECIP_MASK_DIV },
4513 { "sqrt", RECIP_MASK_SQRT },
4514 { "vec-div", RECIP_MASK_VEC_DIV },
4515 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
4518 int const pta_size = ARRAY_SIZE (processor_alias_table);
4520 /* Set up prefix/suffix so the error messages refer to either the command
4521 line argument, or the attribute(target). */
4530 prefix = "option(\"";
4535 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
4536 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
4537 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4538 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
4539 #ifdef TARGET_BI_ARCH
4542 #if TARGET_BI_ARCH == 1
4543 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
4544 is on and OPTION_MASK_ABI_X32 is off. We turn off
4545 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
4547 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4548 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4550 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
4551 on and OPTION_MASK_ABI_64 is off. We turn off
4552 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
4553 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
4554 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
4555 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
4556 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4558 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4559 && TARGET_IAMCU_P (opts->x_target_flags))
4560 sorry ("Intel MCU psABI isn%'t supported in %s mode",
4561 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
4565 if (TARGET_X32_P (opts->x_ix86_isa_flags))
4567 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4568 OPTION_MASK_ABI_64 for TARGET_X32. */
4569 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4570 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
4572 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
4573 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
4574 | OPTION_MASK_ABI_X32
4575 | OPTION_MASK_ABI_64);
4576 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
4578 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
4579 OPTION_MASK_ABI_X32 for TARGET_LP64. */
4580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
4581 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
4584 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4585 SUBTARGET_OVERRIDE_OPTIONS;
4588 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4589 SUBSUBTARGET_OVERRIDE_OPTIONS;
4592 /* -fPIC is the default for x86_64. */
4593 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
4594 opts->x_flag_pic = 2;
4596 /* Need to check -mtune=generic first. */
4597 if (opts->x_ix86_tune_string)
4599 /* As special support for cross compilers we read -mtune=native
4600 as -mtune=generic. With native compilers we won't see the
4601 -mtune=native, as it was changed by the driver. */
4602 if (!strcmp (opts->x_ix86_tune_string, "native"))
4604 opts->x_ix86_tune_string = "generic";
4606 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4607 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
4608 "%stune=k8%s or %stune=generic%s instead as appropriate",
4609 prefix, suffix, prefix, suffix, prefix, suffix);
4613 if (opts->x_ix86_arch_string)
4614 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
4615 if (!opts->x_ix86_tune_string)
4617 opts->x_ix86_tune_string
4618 = processor_target_table[TARGET_CPU_DEFAULT].name;
4619 ix86_tune_defaulted = 1;
4622 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
4623 or defaulted. We need to use a sensible tune option. */
4624 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
4626 opts->x_ix86_tune_string = "generic";
4630 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
4631 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
4633 /* rep; movq isn't available in 32-bit code. */
4634 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
4635 opts->x_ix86_stringop_alg = no_stringop;
4638 if (!opts->x_ix86_arch_string)
4639 opts->x_ix86_arch_string
4640 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
4641 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
4643 ix86_arch_specified = 1;
4645 if (opts_set->x_ix86_pmode)
4647 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
4648 && opts->x_ix86_pmode == PMODE_SI)
4649 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4650 && opts->x_ix86_pmode == PMODE_DI))
4651 error ("address mode %qs not supported in the %s bit mode",
4652 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
4653 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
4656 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
4657 ? PMODE_DI : PMODE_SI;
4659 if (!opts_set->x_ix86_abi)
4660 opts->x_ix86_abi = DEFAULT_ABI;
4662 /* For targets using ms ABI enable ms-extensions, if not
4663 explicit turned off. For non-ms ABI we turn off this
4665 if (!opts_set->x_flag_ms_extensions)
4666 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
4668 if (opts_set->x_ix86_cmodel)
4670 switch (opts->x_ix86_cmodel)
4674 if (opts->x_flag_pic)
4675 opts->x_ix86_cmodel = CM_SMALL_PIC;
4676 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4677 error ("code model %qs not supported in the %s bit mode",
4683 if (opts->x_flag_pic)
4684 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
4685 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4686 error ("code model %qs not supported in the %s bit mode",
4688 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4689 error ("code model %qs not supported in x32 mode",
4695 if (opts->x_flag_pic)
4696 opts->x_ix86_cmodel = CM_LARGE_PIC;
4697 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4698 error ("code model %qs not supported in the %s bit mode",
4700 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
4701 error ("code model %qs not supported in x32 mode",
4706 if (opts->x_flag_pic)
4707 error ("code model %s does not support PIC mode", "32");
4708 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4709 error ("code model %qs not supported in the %s bit mode",
4714 if (opts->x_flag_pic)
4716 error ("code model %s does not support PIC mode", "kernel");
4717 opts->x_ix86_cmodel = CM_32;
4719 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4720 error ("code model %qs not supported in the %s bit mode",
4730 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
4731 use of rip-relative addressing. This eliminates fixups that
4732 would otherwise be needed if this object is to be placed in a
4733 DLL, and is essentially just as efficient as direct addressing. */
4734 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4735 && (TARGET_RDOS || TARGET_PECOFF))
4736 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
4737 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4738 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
4740 opts->x_ix86_cmodel = CM_32;
4742 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
4744 error ("-masm=intel not supported in this configuration");
4745 opts->x_ix86_asm_dialect = ASM_ATT;
4747 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
4748 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
4749 sorry ("%i-bit mode not compiled in",
4750 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
4752 for (i = 0; i < pta_size; i++)
4753 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
4755 ix86_schedule = processor_alias_table[i].schedule;
4756 ix86_arch = processor_alias_table[i].processor;
4757 /* Default cpu tuning to the architecture. */
4758 ix86_tune = ix86_arch;
4760 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4761 && !(processor_alias_table[i].flags & PTA_64BIT))
4762 error ("CPU you selected does not support x86-64 "
4765 if (processor_alias_table[i].flags & PTA_MMX
4766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
4767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
4768 if (processor_alias_table[i].flags & PTA_3DNOW
4769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
4770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
4771 if (processor_alias_table[i].flags & PTA_3DNOW_A
4772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
4773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
4774 if (processor_alias_table[i].flags & PTA_SSE
4775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
4776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
4777 if (processor_alias_table[i].flags & PTA_SSE2
4778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
4779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
4780 if (processor_alias_table[i].flags & PTA_SSE3
4781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
4782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
4783 if (processor_alias_table[i].flags & PTA_SSSE3
4784 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
4785 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
4786 if (processor_alias_table[i].flags & PTA_SSE4_1
4787 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
4788 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
4789 if (processor_alias_table[i].flags & PTA_SSE4_2
4790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
4791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
4792 if (processor_alias_table[i].flags & PTA_AVX
4793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
4794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
4795 if (processor_alias_table[i].flags & PTA_AVX2
4796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
4797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
4798 if (processor_alias_table[i].flags & PTA_FMA
4799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
4800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
4801 if (processor_alias_table[i].flags & PTA_SSE4A
4802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
4803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
4804 if (processor_alias_table[i].flags & PTA_FMA4
4805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
4806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
4807 if (processor_alias_table[i].flags & PTA_XOP
4808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
4809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
4810 if (processor_alias_table[i].flags & PTA_LWP
4811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
4812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
4813 if (processor_alias_table[i].flags & PTA_ABM
4814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
4815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
4816 if (processor_alias_table[i].flags & PTA_BMI
4817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
4818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
4819 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
4820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
4821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
4822 if (processor_alias_table[i].flags & PTA_TBM
4823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
4824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
4825 if (processor_alias_table[i].flags & PTA_BMI2
4826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
4827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
4828 if (processor_alias_table[i].flags & PTA_CX16
4829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
4830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
4831 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
4832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
4833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
4834 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
4835 && (processor_alias_table[i].flags & PTA_NO_SAHF))
4836 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
4837 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
4838 if (processor_alias_table[i].flags & PTA_MOVBE
4839 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
4840 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
4841 if (processor_alias_table[i].flags & PTA_AES
4842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
4843 ix86_isa_flags |= OPTION_MASK_ISA_AES;
4844 if (processor_alias_table[i].flags & PTA_SHA
4845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
4846 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
4847 if (processor_alias_table[i].flags & PTA_PCLMUL
4848 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
4849 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
4850 if (processor_alias_table[i].flags & PTA_FSGSBASE
4851 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
4852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
4853 if (processor_alias_table[i].flags & PTA_RDRND
4854 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
4855 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
4856 if (processor_alias_table[i].flags & PTA_F16C
4857 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
4858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
4859 if (processor_alias_table[i].flags & PTA_RTM
4860 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
4861 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
4862 if (processor_alias_table[i].flags & PTA_HLE
4863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
4864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
4865 if (processor_alias_table[i].flags & PTA_PRFCHW
4866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
4867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
4868 if (processor_alias_table[i].flags & PTA_RDSEED
4869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
4870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
4871 if (processor_alias_table[i].flags & PTA_ADX
4872 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
4873 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
4874 if (processor_alias_table[i].flags & PTA_FXSR
4875 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
4876 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
4877 if (processor_alias_table[i].flags & PTA_XSAVE
4878 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
4879 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
4880 if (processor_alias_table[i].flags & PTA_XSAVEOPT
4881 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
4882 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
4883 if (processor_alias_table[i].flags & PTA_AVX512F
4884 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
4885 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
4886 if (processor_alias_table[i].flags & PTA_AVX512ER
4887 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
4888 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
4889 if (processor_alias_table[i].flags & PTA_AVX512PF
4890 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
4891 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
4892 if (processor_alias_table[i].flags & PTA_AVX512CD
4893 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
4894 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
4895 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
4896 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
4897 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
4898 if (processor_alias_table[i].flags & PTA_PCOMMIT
4899 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
4900 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
4901 if (processor_alias_table[i].flags & PTA_CLWB
4902 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
4903 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
4904 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
4905 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
4906 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
4907 if (processor_alias_table[i].flags & PTA_CLZERO
4908 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLZERO))
4909 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLZERO;
4910 if (processor_alias_table[i].flags & PTA_XSAVEC
4911 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
4912 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
4913 if (processor_alias_table[i].flags & PTA_XSAVES
4914 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
4915 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
4916 if (processor_alias_table[i].flags & PTA_AVX512DQ
4917 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
4918 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
4919 if (processor_alias_table[i].flags & PTA_AVX512BW
4920 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
4921 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
4922 if (processor_alias_table[i].flags & PTA_AVX512VL
4923 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
4924 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
4925 if (processor_alias_table[i].flags & PTA_MPX
4926 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
4927 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
4928 if (processor_alias_table[i].flags & PTA_AVX512VBMI
4929 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
4930 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
4931 if (processor_alias_table[i].flags & PTA_AVX512IFMA
4932 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
4933 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
4934 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
4935 x86_prefetch_sse = true;
4936 if (processor_alias_table[i].flags & PTA_MWAITX
4937 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
4938 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
4939 if (processor_alias_table[i].flags & PTA_PKU
4940 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
4941 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
4943 if (!(opts_set->x_target_flags & MASK_80387))
4945 if (processor_alias_table[i].flags & PTA_NO_80387)
4946 opts->x_target_flags &= ~MASK_80387;
4948 opts->x_target_flags |= MASK_80387;
4953 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
4954 error ("Intel MPX does not support x32");
4956 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
4957 error ("Intel MPX does not support x32");
4959 if (!strcmp (opts->x_ix86_arch_string, "generic"))
4960 error ("generic CPU can be used only for %stune=%s %s",
4961 prefix, suffix, sw);
4962 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
4963 error ("intel CPU can be used only for %stune=%s %s",
4964 prefix, suffix, sw);
4965 else if (i == pta_size)
4966 error ("bad value (%s) for %sarch=%s %s",
4967 opts->x_ix86_arch_string, prefix, suffix, sw);
4969 ix86_arch_mask = 1u << ix86_arch;
4970 for (i = 0; i < X86_ARCH_LAST; ++i)
4971 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4973 for (i = 0; i < pta_size; i++)
4974 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
4976 ix86_schedule = processor_alias_table[i].schedule;
4977 ix86_tune = processor_alias_table[i].processor;
4978 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4980 if (!(processor_alias_table[i].flags & PTA_64BIT))
4982 if (ix86_tune_defaulted)
4984 opts->x_ix86_tune_string = "x86-64";
4985 for (i = 0; i < pta_size; i++)
4986 if (! strcmp (opts->x_ix86_tune_string,
4987 processor_alias_table[i].name))
4989 ix86_schedule = processor_alias_table[i].schedule;
4990 ix86_tune = processor_alias_table[i].processor;
4993 error ("CPU you selected does not support x86-64 "
4997 /* Intel CPUs have always interpreted SSE prefetch instructions as
4998 NOPs; so, we can enable SSE prefetch instructions even when
4999 -mtune (rather than -march) points us to a processor that has them.
5000 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
5001 higher processors. */
5003 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
5004 x86_prefetch_sse = true;
5008 if (ix86_tune_specified && i == pta_size)
5009 error ("bad value (%s) for %stune=%s %s",
5010 opts->x_ix86_tune_string, prefix, suffix, sw);
5012 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
5014 #ifndef USE_IX86_FRAME_POINTER
5015 #define USE_IX86_FRAME_POINTER 0
5018 #ifndef USE_X86_64_FRAME_POINTER
5019 #define USE_X86_64_FRAME_POINTER 0
5022 /* Set the default values for switches whose default depends on TARGET_64BIT
5023 in case they weren't overwritten by command line options. */
5024 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5026 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5027 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
5028 if (opts->x_flag_asynchronous_unwind_tables
5029 && !opts_set->x_flag_unwind_tables
5030 && TARGET_64BIT_MS_ABI)
5031 opts->x_flag_unwind_tables = 1;
5032 if (opts->x_flag_asynchronous_unwind_tables == 2)
5033 opts->x_flag_unwind_tables
5034 = opts->x_flag_asynchronous_unwind_tables = 1;
5035 if (opts->x_flag_pcc_struct_return == 2)
5036 opts->x_flag_pcc_struct_return = 0;
5040 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
5041 opts->x_flag_omit_frame_pointer
5042 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
5043 if (opts->x_flag_asynchronous_unwind_tables == 2)
5044 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
5045 if (opts->x_flag_pcc_struct_return == 2)
5047 /* Intel MCU psABI specifies that -freg-struct-return should
5048 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
5049 we check -miamcu so that -freg-struct-return is always
5050 turned on if -miamcu is used. */
5051 if (TARGET_IAMCU_P (opts->x_target_flags))
5052 opts->x_flag_pcc_struct_return = 0;
5054 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
5058 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5059 /* TODO: ix86_cost should be chosen at instruction or function granuality
5060 so for cold code we use size_cost even in !optimize_size compilation. */
5061 if (opts->x_optimize_size)
5062 ix86_cost = &ix86_size_cost;
5064 ix86_cost = ix86_tune_cost;
5066 /* Arrange to set up i386_stack_locals for all functions. */
5067 init_machine_status = ix86_init_machine_status;
5069 /* Validate -mregparm= value. */
5070 if (opts_set->x_ix86_regparm)
5072 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5073 warning (0, "-mregparm is ignored in 64-bit mode");
5074 else if (TARGET_IAMCU_P (opts->x_target_flags))
5075 warning (0, "-mregparm is ignored for Intel MCU psABI");
5076 if (opts->x_ix86_regparm > REGPARM_MAX)
5078 error ("-mregparm=%d is not between 0 and %d",
5079 opts->x_ix86_regparm, REGPARM_MAX);
5080 opts->x_ix86_regparm = 0;
5083 if (TARGET_IAMCU_P (opts->x_target_flags)
5084 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
5085 opts->x_ix86_regparm = REGPARM_MAX;
5087 /* Default align_* from the processor table. */
5088 ix86_default_align (opts);
5090 /* Provide default for -mbranch-cost= value. */
5091 if (!opts_set->x_ix86_branch_cost)
5092 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
5094 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5096 opts->x_target_flags
5097 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
5099 /* Enable by default the SSE and MMX builtins. Do allow the user to
5100 explicitly disable any of these. In particular, disabling SSE and
5101 MMX for kernel code is extremely useful. */
5102 if (!ix86_arch_specified)
5103 opts->x_ix86_isa_flags
5104 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
5105 | TARGET_SUBTARGET64_ISA_DEFAULT)
5106 & ~opts->x_ix86_isa_flags_explicit);
5108 if (TARGET_RTD_P (opts->x_target_flags))
5109 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
5113 opts->x_target_flags
5114 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
5116 if (!ix86_arch_specified)
5117 opts->x_ix86_isa_flags
5118 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
5120 /* i386 ABI does not specify red zone. It still makes sense to use it
5121 when programmer takes care to stack from being destroyed. */
5122 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
5123 opts->x_target_flags |= MASK_NO_RED_ZONE;
5126 /* Keep nonleaf frame pointers. */
5127 if (opts->x_flag_omit_frame_pointer)
5128 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
5129 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
5130 opts->x_flag_omit_frame_pointer = 1;
5132 /* If we're doing fast math, we don't care about comparison order
5133 wrt NaNs. This lets us use a shorter comparison sequence. */
5134 if (opts->x_flag_finite_math_only)
5135 opts->x_target_flags &= ~MASK_IEEE_FP;
5137 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
5138 since the insns won't need emulation. */
5139 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
5140 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
5142 /* Likewise, if the target doesn't have a 387, or we've specified
5143 software floating point, don't use 387 inline intrinsics. */
5144 if (!TARGET_80387_P (opts->x_target_flags))
5145 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
5147 /* Turn on MMX builtins for -msse. */
5148 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
5149 opts->x_ix86_isa_flags
5150 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
5152 /* Enable SSE prefetch. */
5153 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
5154 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
5155 x86_prefetch_sse = true;
5157 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
5158 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
5159 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
5160 opts->x_ix86_isa_flags
5161 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
5163 /* Enable popcnt instruction for -msse4.2 or -mabm. */
5164 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
5165 || TARGET_ABM_P (opts->x_ix86_isa_flags))
5166 opts->x_ix86_isa_flags
5167 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
5169 /* Enable lzcnt instruction for -mabm. */
5170 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
5171 opts->x_ix86_isa_flags
5172 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
5174 /* Validate -mpreferred-stack-boundary= value or default it to
5175 PREFERRED_STACK_BOUNDARY_DEFAULT. */
5176 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
5177 if (opts_set->x_ix86_preferred_stack_boundary_arg)
5179 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
5180 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
5181 int max = (TARGET_SEH ? 4 : 12);
5183 if (opts->x_ix86_preferred_stack_boundary_arg < min
5184 || opts->x_ix86_preferred_stack_boundary_arg > max)
5187 error ("-mpreferred-stack-boundary is not supported "
5190 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
5191 opts->x_ix86_preferred_stack_boundary_arg, min, max);
5194 ix86_preferred_stack_boundary
5195 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
5198 /* Set the default value for -mstackrealign. */
5199 if (opts->x_ix86_force_align_arg_pointer == -1)
5200 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
5202 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
5204 /* Validate -mincoming-stack-boundary= value or default it to
5205 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
5206 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
5207 if (opts_set->x_ix86_incoming_stack_boundary_arg)
5209 int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2;
5211 if (opts->x_ix86_incoming_stack_boundary_arg < min
5212 || opts->x_ix86_incoming_stack_boundary_arg > 12)
5213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
5214 opts->x_ix86_incoming_stack_boundary_arg, min);
5217 ix86_user_incoming_stack_boundary
5218 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
5219 ix86_incoming_stack_boundary
5220 = ix86_user_incoming_stack_boundary;
5224 #ifndef NO_PROFILE_COUNTERS
5225 if (flag_nop_mcount)
5226 error ("-mnop-mcount is not compatible with this target");
5228 if (flag_nop_mcount && flag_pic)
5229 error ("-mnop-mcount is not implemented for -fPIC");
5231 /* Accept -msseregparm only if at least SSE support is enabled. */
5232 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
5233 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
5234 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
5236 if (opts_set->x_ix86_fpmath)
5238 if (opts->x_ix86_fpmath & FPMATH_SSE)
5240 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
5242 if (TARGET_80387_P (opts->x_target_flags))
5244 warning (0, "SSE instruction set disabled, using 387 arithmetics");
5245 opts->x_ix86_fpmath = FPMATH_387;
5248 else if ((opts->x_ix86_fpmath & FPMATH_387)
5249 && !TARGET_80387_P (opts->x_target_flags))
5251 warning (0, "387 instruction set disabled, using SSE arithmetics");
5252 opts->x_ix86_fpmath = FPMATH_SSE;
5256 /* For all chips supporting SSE2, -mfpmath=sse performs better than
5257 fpmath=387. The second is however default at many targets since the
5258 extra 80bit precision of temporaries is considered to be part of ABI.
5259 Overwrite the default at least for -ffast-math.
5260 TODO: -mfpmath=both seems to produce same performing code with bit
5261 smaller binaries. It is however not clear if register allocation is
5262 ready for this setting.
5263 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
5264 codegen. We may switch to 387 with -ffast-math for size optimized
5266 else if (fast_math_flags_set_p (&global_options)
5267 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
5268 opts->x_ix86_fpmath = FPMATH_SSE;
5270 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
5272 /* Use external vectorized library in vectorizing intrinsics. */
5273 if (opts_set->x_ix86_veclibabi_type)
5274 switch (opts->x_ix86_veclibabi_type)
5276 case ix86_veclibabi_type_svml:
5277 ix86_veclib_handler = ix86_veclibabi_svml;
5280 case ix86_veclibabi_type_acml:
5281 ix86_veclib_handler = ix86_veclibabi_acml;
5288 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
5289 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5290 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5292 /* If stack probes are required, the space used for large function
5293 arguments on the stack must also be probed, so enable
5294 -maccumulate-outgoing-args so this happens in the prologue. */
5295 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
5296 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5298 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5299 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
5300 "for correctness", prefix, suffix);
5301 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5304 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
5305 so enable -maccumulate-outgoing-args when %ebp is fixed. */
5306 if (fixed_regs[BP_REG]
5307 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
5309 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
5310 warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
5312 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
5315 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
5318 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
5319 p = strchr (internal_label_prefix, 'X');
5320 internal_label_prefix_len = p - internal_label_prefix;
5324 /* When scheduling description is not available, disable scheduler pass
5325 so it won't slow down the compilation and make x87 code slower. */
5326 if (!TARGET_SCHEDULE)
5327 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
5329 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5330 ix86_tune_cost->simultaneous_prefetches,
5331 opts->x_param_values,
5332 opts_set->x_param_values);
5333 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5334 ix86_tune_cost->prefetch_block,
5335 opts->x_param_values,
5336 opts_set->x_param_values);
5337 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
5338 ix86_tune_cost->l1_cache_size,
5339 opts->x_param_values,
5340 opts_set->x_param_values);
5341 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
5342 ix86_tune_cost->l2_cache_size,
5343 opts->x_param_values,
5344 opts_set->x_param_values);
5346 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
5347 if (opts->x_flag_prefetch_loop_arrays < 0
5349 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
5350 && !opts->x_optimize_size
5351 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
5352 opts->x_flag_prefetch_loop_arrays = 1;
5354 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
5355 can be opts->x_optimized to ap = __builtin_next_arg (0). */
5356 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
5357 targetm.expand_builtin_va_start = NULL;
5359 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
5361 ix86_gen_leave = gen_leave_rex64;
5362 if (Pmode == DImode)
5364 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
5365 ix86_gen_tls_local_dynamic_base_64
5366 = gen_tls_local_dynamic_base_64_di;
5370 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
5371 ix86_gen_tls_local_dynamic_base_64
5372 = gen_tls_local_dynamic_base_64_si;
5376 ix86_gen_leave = gen_leave;
5378 if (Pmode == DImode)
5380 ix86_gen_add3 = gen_adddi3;
5381 ix86_gen_sub3 = gen_subdi3;
5382 ix86_gen_sub3_carry = gen_subdi3_carry;
5383 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
5384 ix86_gen_andsp = gen_anddi3;
5385 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
5386 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
5387 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
5388 ix86_gen_monitor = gen_sse3_monitor_di;
5389 ix86_gen_monitorx = gen_monitorx_di;
5390 ix86_gen_clzero = gen_clzero_di;
5394 ix86_gen_add3 = gen_addsi3;
5395 ix86_gen_sub3 = gen_subsi3;
5396 ix86_gen_sub3_carry = gen_subsi3_carry;
5397 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
5398 ix86_gen_andsp = gen_andsi3;
5399 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
5400 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
5401 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
5402 ix86_gen_monitor = gen_sse3_monitor_si;
5403 ix86_gen_monitorx = gen_monitorx_si;
5404 ix86_gen_clzero = gen_clzero_si;
5408 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
5409 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
5410 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
5413 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
5415 if (opts->x_flag_fentry > 0)
5416 sorry ("-mfentry isn%'t supported for 32-bit in combination "
5418 opts->x_flag_fentry = 0;
5420 else if (TARGET_SEH)
5422 if (opts->x_flag_fentry == 0)
5423 sorry ("-mno-fentry isn%'t compatible with SEH");
5424 opts->x_flag_fentry = 1;
5426 else if (opts->x_flag_fentry < 0)
5428 #if defined(PROFILE_BEFORE_PROLOGUE)
5429 opts->x_flag_fentry = 1;
5431 opts->x_flag_fentry = 0;
5435 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
5436 opts->x_target_flags |= MASK_VZEROUPPER;
5437 if (!(opts_set->x_target_flags & MASK_STV))
5438 opts->x_target_flags |= MASK_STV;
5439 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
5440 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
5441 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
5442 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
5443 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
5444 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
5445 /* Enable 128-bit AVX instruction generation
5446 for the auto-vectorizer. */
5447 if (TARGET_AVX128_OPTIMAL
5448 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
5449 opts->x_target_flags |= MASK_PREFER_AVX128;
5451 if (opts->x_ix86_recip_name)
5453 char *p = ASTRDUP (opts->x_ix86_recip_name);
5455 unsigned int mask, i;
5458 while ((q = strtok (p, ",")) != NULL)
5469 if (!strcmp (q, "default"))
5470 mask = RECIP_MASK_ALL;
5473 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5474 if (!strcmp (q, recip_options[i].string))
5476 mask = recip_options[i].mask;
5480 if (i == ARRAY_SIZE (recip_options))
5482 error ("unknown option for -mrecip=%s", q);
5484 mask = RECIP_MASK_NONE;
5488 opts->x_recip_mask_explicit |= mask;
5490 opts->x_recip_mask &= ~mask;
5492 opts->x_recip_mask |= mask;
5496 if (TARGET_RECIP_P (opts->x_target_flags))
5497 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
5498 else if (opts_set->x_target_flags & MASK_RECIP)
5499 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
5501 /* Default long double to 64-bit for 32-bit Bionic and to __float128
5502 for 64-bit Bionic. Also default long double to 64-bit for Intel
5504 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
5505 && !(opts_set->x_target_flags
5506 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
5507 opts->x_target_flags |= (TARGET_64BIT
5508 ? MASK_LONG_DOUBLE_128
5509 : MASK_LONG_DOUBLE_64);
5511 /* Only one of them can be active. */
5512 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
5513 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
5515 /* Save the initial options in case the user does function specific
5518 target_option_default_node = target_option_current_node
5519 = build_target_option_node (opts);
5521 /* Handle stack protector */
5522 if (!opts_set->x_ix86_stack_protector_guard)
5523 opts->x_ix86_stack_protector_guard
5524 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
5526 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
5527 if (opts->x_ix86_tune_memcpy_strategy)
5529 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
5530 ix86_parse_stringop_strategy_string (str, false);
5534 if (opts->x_ix86_tune_memset_strategy)
5536 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
5537 ix86_parse_stringop_strategy_string (str, true);
5542 /* Implement the TARGET_OPTION_OVERRIDE hook. */
5545 ix86_option_override (void)
5547 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
5548 struct register_pass_info insert_vzeroupper_info
5549 = { pass_insert_vzeroupper, "reload",
5550 1, PASS_POS_INSERT_AFTER
5552 opt_pass *pass_stv = make_pass_stv (g);
5553 struct register_pass_info stv_info
5554 = { pass_stv, "combine",
5555 1, PASS_POS_INSERT_AFTER
5558 ix86_option_override_internal (true, &global_options, &global_options_set);
5561 /* This needs to be done at start up. It's convenient to do it here. */
5562 register_pass (&insert_vzeroupper_info);
5563 register_pass (&stv_info);
5566 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
5568 ix86_offload_options (void)
5571 return xstrdup ("-foffload-abi=lp64");
5572 return xstrdup ("-foffload-abi=ilp32");
5575 /* Update register usage after having seen the compiler flags. */
5578 ix86_conditional_register_usage (void)
5582 /* For 32-bit targets, squash the REX registers. */
5585 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
5586 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5587 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
5588 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5589 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5590 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5593 /* See the definition of CALL_USED_REGISTERS in i386.h. */
5594 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
5596 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
5598 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5600 /* Set/reset conditionally defined registers from
5601 CALL_USED_REGISTERS initializer. */
5602 if (call_used_regs[i] > 1)
5603 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
5605 /* Calculate registers of CLOBBERED_REGS register set
5606 as call used registers from GENERAL_REGS register set. */
5607 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
5608 && call_used_regs[i])
5609 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
5612 /* If MMX is disabled, squash the registers. */
5614 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5615 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
5616 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5618 /* If SSE is disabled, squash the registers. */
5620 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5621 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
5622 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5624 /* If the FPU is disabled, squash the registers. */
5625 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
5626 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5627 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
5628 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5630 /* If AVX512F is disabled, squash the registers. */
5631 if (! TARGET_AVX512F)
5633 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
5634 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5636 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
5637 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5640 /* If MPX is disabled, squash the registers. */
5642 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
5643 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
5647 /* Save the current options */
5650 ix86_function_specific_save (struct cl_target_option *ptr,
5651 struct gcc_options *opts)
5653 ptr->arch = ix86_arch;
5654 ptr->schedule = ix86_schedule;
5655 ptr->prefetch_sse = x86_prefetch_sse;
5656 ptr->tune = ix86_tune;
5657 ptr->branch_cost = ix86_branch_cost;
5658 ptr->tune_defaulted = ix86_tune_defaulted;
5659 ptr->arch_specified = ix86_arch_specified;
5660 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
5661 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
5662 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
5663 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
5664 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
5665 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
5666 ptr->x_ix86_abi = opts->x_ix86_abi;
5667 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
5668 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
5669 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
5670 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
5671 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
5672 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
5673 ptr->x_ix86_pmode = opts->x_ix86_pmode;
5674 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
5675 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
5676 ptr->x_ix86_regparm = opts->x_ix86_regparm;
5677 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
5678 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
5679 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
5680 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
5681 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
5682 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
5683 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
5684 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
5685 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
5686 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
5688 /* The fields are char but the variables are not; make sure the
5689 values fit in the fields. */
5690 gcc_assert (ptr->arch == ix86_arch);
5691 gcc_assert (ptr->schedule == ix86_schedule);
5692 gcc_assert (ptr->tune == ix86_tune);
5693 gcc_assert (ptr->branch_cost == ix86_branch_cost);
5696 /* Restore the current options */
5699 ix86_function_specific_restore (struct gcc_options *opts,
5700 struct cl_target_option *ptr)
5702 enum processor_type old_tune = ix86_tune;
5703 enum processor_type old_arch = ix86_arch;
5704 unsigned int ix86_arch_mask;
5707 /* We don't change -fPIC. */
5708 opts->x_flag_pic = flag_pic;
5710 ix86_arch = (enum processor_type) ptr->arch;
5711 ix86_schedule = (enum attr_cpu) ptr->schedule;
5712 ix86_tune = (enum processor_type) ptr->tune;
5713 x86_prefetch_sse = ptr->prefetch_sse;
5714 opts->x_ix86_branch_cost = ptr->branch_cost;
5715 ix86_tune_defaulted = ptr->tune_defaulted;
5716 ix86_arch_specified = ptr->arch_specified;
5717 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
5718 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
5719 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
5720 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
5721 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
5722 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
5723 opts->x_ix86_abi = ptr->x_ix86_abi;
5724 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
5725 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
5726 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
5727 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
5728 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
5729 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
5730 opts->x_ix86_pmode = ptr->x_ix86_pmode;
5731 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
5732 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
5733 opts->x_ix86_regparm = ptr->x_ix86_regparm;
5734 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
5735 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
5736 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
5737 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
5738 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
5739 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
5740 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
5741 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
5742 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
5743 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
5744 ix86_tune_cost = processor_target_table[ix86_tune].cost;
5745 /* TODO: ix86_cost should be chosen at instruction or function granuality
5746 so for cold code we use size_cost even in !optimize_size compilation. */
5747 if (opts->x_optimize_size)
5748 ix86_cost = &ix86_size_cost;
5750 ix86_cost = ix86_tune_cost;
5752 /* Recreate the arch feature tests if the arch changed */
5753 if (old_arch != ix86_arch)
5755 ix86_arch_mask = 1u << ix86_arch;
5756 for (i = 0; i < X86_ARCH_LAST; ++i)
5757 ix86_arch_features[i]
5758 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
5761 /* Recreate the tune optimization tests */
5762 if (old_tune != ix86_tune)
5763 set_ix86_tune_features (ix86_tune, false);
5766 /* Adjust target options after streaming them in. This is mainly about
5767 reconciling them with global options. */
5770 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
5772 /* flag_pic is a global option, but ix86_cmodel is target saved option
5773 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
5774 for PIC, or error out. */
5776 switch (ptr->x_ix86_cmodel)
5779 ptr->x_ix86_cmodel = CM_SMALL_PIC;
5783 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
5787 ptr->x_ix86_cmodel = CM_LARGE_PIC;
5791 error ("code model %s does not support PIC mode", "kernel");
5798 switch (ptr->x_ix86_cmodel)
5801 ptr->x_ix86_cmodel = CM_SMALL;
5805 ptr->x_ix86_cmodel = CM_MEDIUM;
5809 ptr->x_ix86_cmodel = CM_LARGE;
5817 /* Print the current options */
5820 ix86_function_specific_print (FILE *file, int indent,
5821 struct cl_target_option *ptr)
5824 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
5825 NULL, NULL, ptr->x_ix86_fpmath, false);
5827 gcc_assert (ptr->arch < PROCESSOR_max);
5828 fprintf (file, "%*sarch = %d (%s)\n",
5830 ptr->arch, processor_target_table[ptr->arch].name);
5832 gcc_assert (ptr->tune < PROCESSOR_max);
5833 fprintf (file, "%*stune = %d (%s)\n",
5835 ptr->tune, processor_target_table[ptr->tune].name);
5837 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
5841 fprintf (file, "%*s%s\n", indent, "", target_string);
5842 free (target_string);
5847 /* Inner function to process the attribute((target(...))), take an argument and
5848 set the current options from the argument. If we have a list, recursively go
5852 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
5853 struct gcc_options *opts,
5854 struct gcc_options *opts_set,
5855 struct gcc_options *enum_opts_set)
5860 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
5861 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
5862 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
5863 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
5864 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
5880 enum ix86_opt_type type;
5885 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
5886 IX86_ATTR_ISA ("abm", OPT_mabm),
5887 IX86_ATTR_ISA ("bmi", OPT_mbmi),
5888 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
5889 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
5890 IX86_ATTR_ISA ("tbm", OPT_mtbm),
5891 IX86_ATTR_ISA ("aes", OPT_maes),
5892 IX86_ATTR_ISA ("sha", OPT_msha),
5893 IX86_ATTR_ISA ("avx", OPT_mavx),
5894 IX86_ATTR_ISA ("avx2", OPT_mavx2),
5895 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
5896 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
5897 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
5898 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
5899 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
5900 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
5901 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
5902 IX86_ATTR_ISA ("mmx", OPT_mmmx),
5903 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
5904 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
5905 IX86_ATTR_ISA ("sse", OPT_msse),
5906 IX86_ATTR_ISA ("sse2", OPT_msse2),
5907 IX86_ATTR_ISA ("sse3", OPT_msse3),
5908 IX86_ATTR_ISA ("sse4", OPT_msse4),
5909 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
5910 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
5911 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
5912 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
5913 IX86_ATTR_ISA ("fma4", OPT_mfma4),
5914 IX86_ATTR_ISA ("fma", OPT_mfma),
5915 IX86_ATTR_ISA ("xop", OPT_mxop),
5916 IX86_ATTR_ISA ("lwp", OPT_mlwp),
5917 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
5918 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
5919 IX86_ATTR_ISA ("f16c", OPT_mf16c),
5920 IX86_ATTR_ISA ("rtm", OPT_mrtm),
5921 IX86_ATTR_ISA ("hle", OPT_mhle),
5922 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
5923 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
5924 IX86_ATTR_ISA ("adx", OPT_madx),
5925 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
5926 IX86_ATTR_ISA ("xsave", OPT_mxsave),
5927 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
5928 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
5929 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
5930 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
5931 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
5932 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
5933 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
5934 IX86_ATTR_ISA ("clwb", OPT_mclwb),
5935 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
5936 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
5937 IX86_ATTR_ISA ("clzero", OPT_mclzero),
5938 IX86_ATTR_ISA ("pku", OPT_mpku),
5941 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
5943 /* string options */
5944 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
5945 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
5948 IX86_ATTR_YES ("cld",
5952 IX86_ATTR_NO ("fancy-math-387",
5953 OPT_mfancy_math_387,
5954 MASK_NO_FANCY_MATH_387),
5956 IX86_ATTR_YES ("ieee-fp",
5960 IX86_ATTR_YES ("inline-all-stringops",
5961 OPT_minline_all_stringops,
5962 MASK_INLINE_ALL_STRINGOPS),
5964 IX86_ATTR_YES ("inline-stringops-dynamically",
5965 OPT_minline_stringops_dynamically,
5966 MASK_INLINE_STRINGOPS_DYNAMICALLY),
5968 IX86_ATTR_NO ("align-stringops",
5969 OPT_mno_align_stringops,
5970 MASK_NO_ALIGN_STRINGOPS),
5972 IX86_ATTR_YES ("recip",
5978 /* If this is a list, recurse to get the options. */
5979 if (TREE_CODE (args) == TREE_LIST)
5983 for (; args; args = TREE_CHAIN (args))
5984 if (TREE_VALUE (args)
5985 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
5986 p_strings, opts, opts_set,
5993 else if (TREE_CODE (args) != STRING_CST)
5995 error ("attribute %<target%> argument not a string");
5999 /* Handle multiple arguments separated by commas. */
6000 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
6002 while (next_optstr && *next_optstr != '\0')
6004 char *p = next_optstr;
6006 char *comma = strchr (next_optstr, ',');
6007 const char *opt_string;
6008 size_t len, opt_len;
6013 enum ix86_opt_type type = ix86_opt_unknown;
6019 len = comma - next_optstr;
6020 next_optstr = comma + 1;
6028 /* Recognize no-xxx. */
6029 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
6038 /* Find the option. */
6041 for (i = 0; i < ARRAY_SIZE (attrs); i++)
6043 type = attrs[i].type;
6044 opt_len = attrs[i].len;
6045 if (ch == attrs[i].string[0]
6046 && ((type != ix86_opt_str && type != ix86_opt_enum)
6049 && memcmp (p, attrs[i].string, opt_len) == 0)
6052 mask = attrs[i].mask;
6053 opt_string = attrs[i].string;
6058 /* Process the option. */
6061 error ("attribute(target(\"%s\")) is unknown", orig_p);
6065 else if (type == ix86_opt_isa)
6067 struct cl_decoded_option decoded;
6069 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
6070 ix86_handle_option (opts, opts_set,
6071 &decoded, input_location);
6074 else if (type == ix86_opt_yes || type == ix86_opt_no)
6076 if (type == ix86_opt_no)
6077 opt_set_p = !opt_set_p;
6080 opts->x_target_flags |= mask;
6082 opts->x_target_flags &= ~mask;
6085 else if (type == ix86_opt_str)
6089 error ("option(\"%s\") was already specified", opt_string);
6093 p_strings[opt] = xstrdup (p + opt_len);
6096 else if (type == ix86_opt_enum)
6101 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
6103 set_option (opts, enum_opts_set, opt, value,
6104 p + opt_len, DK_UNSPECIFIED, input_location,
6108 error ("attribute(target(\"%s\")) is unknown", orig_p);
6120 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
6123 ix86_valid_target_attribute_tree (tree args,
6124 struct gcc_options *opts,
6125 struct gcc_options *opts_set)
6127 const char *orig_arch_string = opts->x_ix86_arch_string;
6128 const char *orig_tune_string = opts->x_ix86_tune_string;
6129 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
6130 int orig_tune_defaulted = ix86_tune_defaulted;
6131 int orig_arch_specified = ix86_arch_specified;
6132 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
6135 struct cl_target_option *def
6136 = TREE_TARGET_OPTION (target_option_default_node);
6137 struct gcc_options enum_opts_set;
6139 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
6141 /* Process each of the options on the chain. */
6142 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
6143 opts_set, &enum_opts_set))
6144 return error_mark_node;
6146 /* If the changed options are different from the default, rerun
6147 ix86_option_override_internal, and then save the options away.
6148 The string options are attribute options, and will be undone
6149 when we copy the save structure. */
6150 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
6151 || opts->x_target_flags != def->x_target_flags
6152 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
6153 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
6154 || enum_opts_set.x_ix86_fpmath)
6156 /* If we are using the default tune= or arch=, undo the string assigned,
6157 and use the default. */
6158 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
6160 opts->x_ix86_arch_string
6161 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
6163 /* If arch= is set, clear all bits in x_ix86_isa_flags,
6164 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
6165 opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT
6166 | OPTION_MASK_ABI_64
6167 | OPTION_MASK_ABI_X32
6168 | OPTION_MASK_CODE16);
6171 else if (!orig_arch_specified)
6172 opts->x_ix86_arch_string = NULL;
6174 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
6175 opts->x_ix86_tune_string
6176 = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
6177 else if (orig_tune_defaulted)
6178 opts->x_ix86_tune_string = NULL;
6180 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
6181 if (enum_opts_set.x_ix86_fpmath)
6182 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6183 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
6184 && TARGET_SSE_P (opts->x_ix86_isa_flags))
6186 if (TARGET_80387_P (opts->x_target_flags))
6187 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE
6190 opts->x_ix86_fpmath = (enum fpmath_unit) FPMATH_SSE;
6191 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
6194 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
6195 ix86_option_override_internal (false, opts, opts_set);
6197 /* Add any builtin functions with the new isa if any. */
6198 ix86_add_new_builtins (opts->x_ix86_isa_flags);
6200 /* Save the current options unless we are validating options for
6202 t = build_target_option_node (opts);
6204 opts->x_ix86_arch_string = orig_arch_string;
6205 opts->x_ix86_tune_string = orig_tune_string;
6206 opts_set->x_ix86_fpmath = orig_fpmath_set;
6208 /* Free up memory allocated to hold the strings */
6209 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
6210 free (option_strings[i]);
6216 /* Hook to validate attribute((target("string"))). */
6219 ix86_valid_target_attribute_p (tree fndecl,
6220 tree ARG_UNUSED (name),
6222 int ARG_UNUSED (flags))
6224 struct gcc_options func_options;
6225 tree new_target, new_optimize;
6228 /* attribute((target("default"))) does nothing, beyond
6229 affecting multi-versioning. */
6230 if (TREE_VALUE (args)
6231 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
6232 && TREE_CHAIN (args) == NULL_TREE
6233 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
6236 tree old_optimize = build_optimization_node (&global_options);
6238 /* Get the optimization options of the current function. */
6239 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
6242 func_optimize = old_optimize;
6244 /* Init func_options. */
6245 memset (&func_options, 0, sizeof (func_options));
6246 init_options_struct (&func_options, NULL);
6247 lang_hooks.init_options_struct (&func_options);
6249 cl_optimization_restore (&func_options,
6250 TREE_OPTIMIZATION (func_optimize));
6252 /* Initialize func_options to the default before its target options can
6254 cl_target_option_restore (&func_options,
6255 TREE_TARGET_OPTION (target_option_default_node));
6257 new_target = ix86_valid_target_attribute_tree (args, &func_options,
6258 &global_options_set);
6260 new_optimize = build_optimization_node (&func_options);
6262 if (new_target == error_mark_node)
6265 else if (fndecl && new_target)
6267 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
6269 if (old_optimize != new_optimize)
6270 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
6273 finalize_options_struct (&func_options);
6279 /* Hook to determine if one function can safely inline another. */
6282 ix86_can_inline_p (tree caller, tree callee)
6285 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
6286 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
6288 /* If callee has no option attributes, then it is ok to inline. */
6292 /* If caller has no option attributes, but callee does then it is not ok to
6294 else if (!caller_tree)
6299 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
6300 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
6302 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
6303 can inline a SSE2 function but a SSE2 function can't inline a SSE4
6305 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
6306 != callee_opts->x_ix86_isa_flags)
6309 /* See if we have the same non-isa options. */
6310 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
6313 /* See if arch, tune, etc. are the same. */
6314 else if (caller_opts->arch != callee_opts->arch)
6317 else if (caller_opts->tune != callee_opts->tune)
6320 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
6323 else if (caller_opts->branch_cost != callee_opts->branch_cost)
6334 /* Remember the last target of ix86_set_current_function. */
6335 static GTY(()) tree ix86_previous_fndecl;
6337 /* Set targets globals to the default (or current #pragma GCC target
6338 if active). Invalidate ix86_previous_fndecl cache. */
6341 ix86_reset_previous_fndecl (void)
6343 tree new_tree = target_option_current_node;
6344 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6345 if (TREE_TARGET_GLOBALS (new_tree))
6346 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6347 else if (new_tree == target_option_default_node)
6348 restore_target_globals (&default_target_globals);
6350 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6351 ix86_previous_fndecl = NULL_TREE;
6354 /* Establish appropriate back-end context for processing the function
6355 FNDECL. The argument might be NULL to indicate processing at top
6356 level, outside of any function scope. */
6358 ix86_set_current_function (tree fndecl)
6360 /* Only change the context if the function changes. This hook is called
6361 several times in the course of compiling a function, and we don't want to
6362 slow things down too much or call target_reinit when it isn't safe. */
6363 if (fndecl == ix86_previous_fndecl)
6367 if (ix86_previous_fndecl == NULL_TREE)
6368 old_tree = target_option_current_node;
6369 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
6370 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
6372 old_tree = target_option_default_node;
6374 if (fndecl == NULL_TREE)
6376 if (old_tree != target_option_current_node)
6377 ix86_reset_previous_fndecl ();
6381 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
6382 if (new_tree == NULL_TREE)
6383 new_tree = target_option_default_node;
6385 if (old_tree != new_tree)
6387 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6388 if (TREE_TARGET_GLOBALS (new_tree))
6389 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
6390 else if (new_tree == target_option_default_node)
6391 restore_target_globals (&default_target_globals);
6393 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
6395 ix86_previous_fndecl = fndecl;
6397 /* 64-bit MS and SYSV ABI have different set of call used registers.
6398 Avoid expensive re-initialization of init_regs each time we switch
6399 function context. */
6401 && (call_used_regs[SI_REG]
6402 == (cfun->machine->call_abi == MS_ABI)))
6407 /* Return true if this goes in large data/bss. */
6410 ix86_in_large_data_p (tree exp)
6412 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
6415 /* Functions are never large data. */
6416 if (TREE_CODE (exp) == FUNCTION_DECL)
6419 /* Automatic variables are never large data. */
6420 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
6423 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
6425 const char *section = DECL_SECTION_NAME (exp);
6426 if (strcmp (section, ".ldata") == 0
6427 || strcmp (section, ".lbss") == 0)
6433 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
6435 /* If this is an incomplete type with size 0, then we can't put it
6436 in data because it might be too big when completed. Also,
6437 int_size_in_bytes returns -1 if size can vary or is larger than
6438 an integer in which case also it is safer to assume that it goes in
6440 if (size <= 0 || size > ix86_section_threshold)
6447 /* Switch to the appropriate section for output of DECL.
6448 DECL is either a `VAR_DECL' node or a constant of some sort.
6449 RELOC indicates whether forming the initial value of DECL requires
6450 link-time relocations. */
6452 ATTRIBUTE_UNUSED static section *
6453 x86_64_elf_select_section (tree decl, int reloc,
6454 unsigned HOST_WIDE_INT align)
6456 if (ix86_in_large_data_p (decl))
6458 const char *sname = NULL;
6459 unsigned int flags = SECTION_WRITE;
6460 switch (categorize_decl_for_section (decl, reloc))
6465 case SECCAT_DATA_REL:
6466 sname = ".ldata.rel";
6468 case SECCAT_DATA_REL_LOCAL:
6469 sname = ".ldata.rel.local";
6471 case SECCAT_DATA_REL_RO:
6472 sname = ".ldata.rel.ro";
6474 case SECCAT_DATA_REL_RO_LOCAL:
6475 sname = ".ldata.rel.ro.local";
6479 flags |= SECTION_BSS;
6482 case SECCAT_RODATA_MERGE_STR:
6483 case SECCAT_RODATA_MERGE_STR_INIT:
6484 case SECCAT_RODATA_MERGE_CONST:
6488 case SECCAT_SRODATA:
6495 /* We don't split these for medium model. Place them into
6496 default sections and hope for best. */
6501 /* We might get called with string constants, but get_named_section
6502 doesn't like them as they are not DECLs. Also, we need to set
6503 flags in that case. */
6505 return get_section (sname, flags, NULL);
6506 return get_named_section (decl, sname, reloc);
6509 return default_elf_select_section (decl, reloc, align);
6512 /* Select a set of attributes for section NAME based on the properties
6513 of DECL and whether or not RELOC indicates that DECL's initializer
6514 might contain runtime relocations. */
6516 static unsigned int ATTRIBUTE_UNUSED
6517 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
6519 unsigned int flags = default_section_type_flags (decl, name, reloc);
6521 if (decl == NULL_TREE
6522 && (strcmp (name, ".ldata.rel.ro") == 0
6523 || strcmp (name, ".ldata.rel.ro.local") == 0))
6524 flags |= SECTION_RELRO;
6526 if (strcmp (name, ".lbss") == 0
6527 || strncmp (name, ".lbss.", 5) == 0
6528 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
6529 flags |= SECTION_BSS;
6534 /* Build up a unique section name, expressed as a
6535 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
6536 RELOC indicates whether the initial value of EXP requires
6537 link-time relocations. */
6539 static void ATTRIBUTE_UNUSED
6540 x86_64_elf_unique_section (tree decl, int reloc)
6542 if (ix86_in_large_data_p (decl))
6544 const char *prefix = NULL;
6545 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
6546 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
6548 switch (categorize_decl_for_section (decl, reloc))
6551 case SECCAT_DATA_REL:
6552 case SECCAT_DATA_REL_LOCAL:
6553 case SECCAT_DATA_REL_RO:
6554 case SECCAT_DATA_REL_RO_LOCAL:
6555 prefix = one_only ? ".ld" : ".ldata";
6558 prefix = one_only ? ".lb" : ".lbss";
6561 case SECCAT_RODATA_MERGE_STR:
6562 case SECCAT_RODATA_MERGE_STR_INIT:
6563 case SECCAT_RODATA_MERGE_CONST:
6564 prefix = one_only ? ".lr" : ".lrodata";
6566 case SECCAT_SRODATA:
6573 /* We don't split these for medium model. Place them into
6574 default sections and hope for best. */
6579 const char *name, *linkonce;
6582 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
6583 name = targetm.strip_name_encoding (name);
6585 /* If we're using one_only, then there needs to be a .gnu.linkonce
6586 prefix to the section name. */
6587 linkonce = one_only ? ".gnu.linkonce" : "";
6589 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
6591 set_decl_section_name (decl, string);
6595 default_unique_section (decl, reloc);
6598 #ifdef COMMON_ASM_OP
6599 /* This says how to output assembler code to declare an
6600 uninitialized external linkage data object.
6602 For medium model x86-64 we need to use .largecomm opcode for
6605 x86_elf_aligned_common (FILE *file,
6606 const char *name, unsigned HOST_WIDE_INT size,
6609 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6610 && size > (unsigned int)ix86_section_threshold)
6611 fputs ("\t.largecomm\t", file);
6613 fputs (COMMON_ASM_OP, file);
6614 assemble_name (file, name);
6615 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
6616 size, align / BITS_PER_UNIT);
6620 /* Utility function for targets to use in implementing
6621 ASM_OUTPUT_ALIGNED_BSS. */
6624 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
6625 unsigned HOST_WIDE_INT size, int align)
6627 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
6628 && size > (unsigned int)ix86_section_threshold)
6629 switch_to_section (get_named_section (decl, ".lbss", 0));
6631 switch_to_section (bss_section);
6632 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
6633 #ifdef ASM_DECLARE_OBJECT_NAME
6634 last_assemble_variable_decl = decl;
6635 ASM_DECLARE_OBJECT_NAME (file, name, decl);
6637 /* Standard thing is just output label for the object. */
6638 ASM_OUTPUT_LABEL (file, name);
6639 #endif /* ASM_DECLARE_OBJECT_NAME */
6640 ASM_OUTPUT_SKIP (file, size ? size : 1);
6643 /* Decide whether we must probe the stack before any space allocation
6644 on this target. It's essentially TARGET_STACK_PROBE except when
6645 -fstack-check causes the stack to be already probed differently. */
6648 ix86_target_stack_probe (void)
6650 /* Do not probe the stack twice if static stack checking is enabled. */
6651 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
6654 return TARGET_STACK_PROBE;
6657 /* Decide whether we can make a sibling call to a function. DECL is the
6658 declaration of the function being targeted by the call and EXP is the
6659 CALL_EXPR representing the call. */
6662 ix86_function_ok_for_sibcall (tree decl, tree exp)
6664 tree type, decl_or_type;
6666 bool bind_global = decl && !targetm.binds_local_p (decl);
6668 /* If we are generating position-independent code, we cannot sibcall
6669 optimize direct calls to global functions, as the PLT requires
6670 %ebx be live. (Darwin does not have a PLT.) */
6678 /* If we need to align the outgoing stack, then sibcalling would
6679 unalign the stack, which may break the called function. */
6680 if (ix86_minimum_incoming_stack_boundary (true)
6681 < PREFERRED_STACK_BOUNDARY)
6686 decl_or_type = decl;
6687 type = TREE_TYPE (decl);
6691 /* We're looking at the CALL_EXPR, we need the type of the function. */
6692 type = CALL_EXPR_FN (exp); /* pointer expression */
6693 type = TREE_TYPE (type); /* pointer type */
6694 type = TREE_TYPE (type); /* function type */
6695 decl_or_type = type;
6698 /* Check that the return value locations are the same. Like
6699 if we are returning floats on the 80387 register stack, we cannot
6700 make a sibcall from a function that doesn't return a float to a
6701 function that does or, conversely, from a function that does return
6702 a float to a function that doesn't; the necessary stack adjustment
6703 would not be executed. This is also the place we notice
6704 differences in the return value ABI. Note that it is ok for one
6705 of the functions to have void return type as long as the return
6706 value of the other is passed in a register. */
6707 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
6708 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6710 if (STACK_REG_P (a) || STACK_REG_P (b))
6712 if (!rtx_equal_p (a, b))
6715 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6717 else if (!rtx_equal_p (a, b))
6722 /* The SYSV ABI has more call-clobbered registers;
6723 disallow sibcalls from MS to SYSV. */
6724 if (cfun->machine->call_abi == MS_ABI
6725 && ix86_function_type_abi (type) == SYSV_ABI)
6730 /* If this call is indirect, we'll need to be able to use a
6731 call-clobbered register for the address of the target function.
6732 Make sure that all such registers are not used for passing
6733 parameters. Note that DLLIMPORT functions and call to global
6734 function via GOT slot are indirect. */
6736 || (bind_global && flag_pic && !flag_plt)
6737 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
6739 /* Check if regparm >= 3 since arg_reg_available is set to
6740 false if regparm == 0. If regparm is 1 or 2, there is
6741 always a call-clobbered register available.
6743 ??? The symbol indirect call doesn't need a call-clobbered
6744 register. But we don't know if this is a symbol indirect
6745 call or not here. */
6746 if (ix86_function_regparm (type, NULL) >= 3
6747 && !cfun->machine->arg_reg_available)
6752 /* Otherwise okay. That also includes certain types of indirect calls. */
6756 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
6757 and "sseregparm" calling convention attributes;
6758 arguments as in struct attribute_spec.handler. */
6761 ix86_handle_cconv_attribute (tree *node, tree name,
6766 if (TREE_CODE (*node) != FUNCTION_TYPE
6767 && TREE_CODE (*node) != METHOD_TYPE
6768 && TREE_CODE (*node) != FIELD_DECL
6769 && TREE_CODE (*node) != TYPE_DECL)
6771 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6773 *no_add_attrs = true;
6777 /* Can combine regparm with all attributes but fastcall, and thiscall. */
6778 if (is_attribute_p ("regparm", name))
6782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6784 error ("fastcall and regparm attributes are not compatible");
6787 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6789 error ("regparam and thiscall attributes are not compatible");
6792 cst = TREE_VALUE (args);
6793 if (TREE_CODE (cst) != INTEGER_CST)
6795 warning (OPT_Wattributes,
6796 "%qE attribute requires an integer constant argument",
6798 *no_add_attrs = true;
6800 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
6802 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
6804 *no_add_attrs = true;
6812 /* Do not warn when emulating the MS ABI. */
6813 if ((TREE_CODE (*node) != FUNCTION_TYPE
6814 && TREE_CODE (*node) != METHOD_TYPE)
6815 || ix86_function_type_abi (*node) != MS_ABI)
6816 warning (OPT_Wattributes, "%qE attribute ignored",
6818 *no_add_attrs = true;
6822 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
6823 if (is_attribute_p ("fastcall", name))
6825 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6827 error ("fastcall and cdecl attributes are not compatible");
6829 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6831 error ("fastcall and stdcall attributes are not compatible");
6833 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
6835 error ("fastcall and regparm attributes are not compatible");
6837 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6839 error ("fastcall and thiscall attributes are not compatible");
6843 /* Can combine stdcall with fastcall (redundant), regparm and
6845 else if (is_attribute_p ("stdcall", name))
6847 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6849 error ("stdcall and cdecl attributes are not compatible");
6851 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6853 error ("stdcall and fastcall attributes are not compatible");
6855 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6857 error ("stdcall and thiscall attributes are not compatible");
6861 /* Can combine cdecl with regparm and sseregparm. */
6862 else if (is_attribute_p ("cdecl", name))
6864 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6866 error ("stdcall and cdecl attributes are not compatible");
6868 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6870 error ("fastcall and cdecl attributes are not compatible");
6872 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
6874 error ("cdecl and thiscall attributes are not compatible");
6877 else if (is_attribute_p ("thiscall", name))
6879 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
6880 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
6882 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
6884 error ("stdcall and thiscall attributes are not compatible");
6886 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
6888 error ("fastcall and thiscall attributes are not compatible");
6890 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
6892 error ("cdecl and thiscall attributes are not compatible");
6896 /* Can combine sseregparm with all attributes. */
6901 /* The transactional memory builtins are implicitly regparm or fastcall
6902 depending on the ABI. Override the generic do-nothing attribute that
6903 these builtins were declared with, and replace it with one of the two
6904 attributes that we expect elsewhere. */
6907 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
6908 int flags, bool *no_add_attrs)
6912 /* In no case do we want to add the placeholder attribute. */
6913 *no_add_attrs = true;
6915 /* The 64-bit ABI is unchanged for transactional memory. */
6919 /* ??? Is there a better way to validate 32-bit windows? We have
6920 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
6921 if (CHECK_STACK_LIMIT > 0)
6922 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
6925 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
6926 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
6928 decl_attributes (node, alt, flags);
6933 /* This function determines from TYPE the calling-convention. */
6936 ix86_get_callcvt (const_tree type)
6938 unsigned int ret = 0;
6943 return IX86_CALLCVT_CDECL;
6945 attrs = TYPE_ATTRIBUTES (type);
6946 if (attrs != NULL_TREE)
6948 if (lookup_attribute ("cdecl", attrs))
6949 ret |= IX86_CALLCVT_CDECL;
6950 else if (lookup_attribute ("stdcall", attrs))
6951 ret |= IX86_CALLCVT_STDCALL;
6952 else if (lookup_attribute ("fastcall", attrs))
6953 ret |= IX86_CALLCVT_FASTCALL;
6954 else if (lookup_attribute ("thiscall", attrs))
6955 ret |= IX86_CALLCVT_THISCALL;
6957 /* Regparam isn't allowed for thiscall and fastcall. */
6958 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
6960 if (lookup_attribute ("regparm", attrs))
6961 ret |= IX86_CALLCVT_REGPARM;
6962 if (lookup_attribute ("sseregparm", attrs))
6963 ret |= IX86_CALLCVT_SSEREGPARM;
6966 if (IX86_BASE_CALLCVT(ret) != 0)
6970 is_stdarg = stdarg_p (type);
6971 if (TARGET_RTD && !is_stdarg)
6972 return IX86_CALLCVT_STDCALL | ret;
6976 || TREE_CODE (type) != METHOD_TYPE
6977 || ix86_function_type_abi (type) != MS_ABI)
6978 return IX86_CALLCVT_CDECL | ret;
6980 return IX86_CALLCVT_THISCALL;
6983 /* Return 0 if the attributes for two types are incompatible, 1 if they
6984 are compatible, and 2 if they are nearly compatible (which causes a
6985 warning to be generated). */
6988 ix86_comp_type_attributes (const_tree type1, const_tree type2)
6990 unsigned int ccvt1, ccvt2;
6992 if (TREE_CODE (type1) != FUNCTION_TYPE
6993 && TREE_CODE (type1) != METHOD_TYPE)
6996 ccvt1 = ix86_get_callcvt (type1);
6997 ccvt2 = ix86_get_callcvt (type2);
7000 if (ix86_function_regparm (type1, NULL)
7001 != ix86_function_regparm (type2, NULL))
7007 /* Return the regparm value for a function with the indicated TYPE and DECL.
7008 DECL may be NULL when calling function indirectly
7009 or considering a libcall. */
7012 ix86_function_regparm (const_tree type, const_tree decl)
7019 return (ix86_function_type_abi (type) == SYSV_ABI
7020 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
7021 ccvt = ix86_get_callcvt (type);
7022 regparm = ix86_regparm;
7024 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
7026 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
7029 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
7033 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7035 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7038 /* Use register calling convention for local functions when possible. */
7040 && TREE_CODE (decl) == FUNCTION_DECL)
7042 cgraph_node *target = cgraph_node::get (decl);
7044 target = target->function_symbol ();
7046 /* Caller and callee must agree on the calling convention, so
7047 checking here just optimize means that with
7048 __attribute__((optimize (...))) caller could use regparm convention
7049 and callee not, or vice versa. Instead look at whether the callee
7050 is optimized or not. */
7051 if (target && opt_for_fn (target->decl, optimize)
7052 && !(profile_flag && !flag_fentry))
7054 cgraph_local_info *i = &target->local;
7055 if (i && i->local && i->can_change_signature)
7057 int local_regparm, globals = 0, regno;
7059 /* Make sure no regparm register is taken by a
7060 fixed register variable. */
7061 for (local_regparm = 0; local_regparm < REGPARM_MAX;
7063 if (fixed_regs[local_regparm])
7066 /* We don't want to use regparm(3) for nested functions as
7067 these use a static chain pointer in the third argument. */
7068 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
7071 /* Save a register for the split stack. */
7072 if (local_regparm == 3 && flag_split_stack)
7075 /* Each fixed register usage increases register pressure,
7076 so less registers should be used for argument passing.
7077 This functionality can be overriden by an explicit
7079 for (regno = AX_REG; regno <= DI_REG; regno++)
7080 if (fixed_regs[regno])
7084 = globals < local_regparm ? local_regparm - globals : 0;
7086 if (local_regparm > regparm)
7087 regparm = local_regparm;
7095 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
7096 DFmode (2) arguments in SSE registers for a function with the
7097 indicated TYPE and DECL. DECL may be NULL when calling function
7098 indirectly or considering a libcall. Return -1 if any FP parameter
7099 should be rejected by error. This is used in siutation we imply SSE
7100 calling convetion but the function is called from another function with
7101 SSE disabled. Otherwise return 0. */
7104 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
7106 gcc_assert (!TARGET_64BIT);
7108 /* Use SSE registers to pass SFmode and DFmode arguments if requested
7109 by the sseregparm attribute. */
7110 if (TARGET_SSEREGPARM
7111 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
7118 error ("calling %qD with attribute sseregparm without "
7119 "SSE/SSE2 enabled", decl);
7121 error ("calling %qT with attribute sseregparm without "
7122 "SSE/SSE2 enabled", type);
7133 cgraph_node *target = cgraph_node::get (decl);
7135 target = target->function_symbol ();
7137 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
7138 (and DFmode for SSE2) arguments in SSE registers. */
7140 /* TARGET_SSE_MATH */
7141 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
7142 && opt_for_fn (target->decl, optimize)
7143 && !(profile_flag && !flag_fentry))
7145 cgraph_local_info *i = &target->local;
7146 if (i && i->local && i->can_change_signature)
7148 /* Refuse to produce wrong code when local function with SSE enabled
7149 is called from SSE disabled function.
7150 FIXME: We need a way to detect these cases cross-ltrans partition
7151 and avoid using SSE calling conventions on local functions called
7152 from function with SSE disabled. For now at least delay the
7153 warning until we know we are going to produce wrong code.
7155 if (!TARGET_SSE && warn)
7157 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
7158 ->x_ix86_isa_flags) ? 2 : 1;
7165 /* Return true if EAX is live at the start of the function. Used by
7166 ix86_expand_prologue to determine if we need special help before
7167 calling allocate_stack_worker. */
7170 ix86_eax_live_at_start_p (void)
7172 /* Cheat. Don't bother working forward from ix86_function_regparm
7173 to the function type to whether an actual argument is located in
7174 eax. Instead just look at cfg info, which is still close enough
7175 to correct at this point. This gives false positives for broken
7176 functions that might use uninitialized data that happens to be
7177 allocated in eax, but who cares? */
7178 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
7182 ix86_keep_aggregate_return_pointer (tree fntype)
7188 attr = lookup_attribute ("callee_pop_aggregate_return",
7189 TYPE_ATTRIBUTES (fntype));
7191 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
7193 /* For 32-bit MS-ABI the default is to keep aggregate
7195 if (ix86_function_type_abi (fntype) == MS_ABI)
7198 return KEEP_AGGREGATE_RETURN_POINTER != 0;
7201 /* Value is the number of bytes of arguments automatically
7202 popped when returning from a subroutine call.
7203 FUNDECL is the declaration node of the function (as a tree),
7204 FUNTYPE is the data type of the function (as a tree),
7205 or for a library call it is an identifier node for the subroutine name.
7206 SIZE is the number of bytes of arguments passed on the stack.
7208 On the 80386, the RTD insn may be used to pop them if the number
7209 of args is fixed, but if the number is variable then the caller
7210 must pop them all. RTD can't be used for library calls now
7211 because the library is compiled with the Unix compiler.
7212 Use of RTD is a selectable option, since it is incompatible with
7213 standard Unix calling sequences. If the option is not selected,
7214 the caller must always pop the args.
7216 The attribute stdcall is equivalent to RTD on a per module basis. */
7219 ix86_return_pops_args (tree fundecl, tree funtype, int size)
7223 /* None of the 64-bit ABIs pop arguments. */
7227 ccvt = ix86_get_callcvt (funtype);
7229 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
7230 | IX86_CALLCVT_THISCALL)) != 0
7231 && ! stdarg_p (funtype))
7234 /* Lose any fake structure return argument if it is passed on the stack. */
7235 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
7236 && !ix86_keep_aggregate_return_pointer (funtype))
7238 int nregs = ix86_function_regparm (funtype, fundecl);
7240 return GET_MODE_SIZE (Pmode);
7246 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
7249 ix86_legitimate_combined_insn (rtx_insn *insn)
7251 /* Check operand constraints in case hard registers were propagated
7252 into insn pattern. This check prevents combine pass from
7253 generating insn patterns with invalid hard register operands.
7254 These invalid insns can eventually confuse reload to error out
7255 with a spill failure. See also PRs 46829 and 46843. */
7256 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
7260 extract_insn (insn);
7261 preprocess_constraints (insn);
7263 int n_operands = recog_data.n_operands;
7264 int n_alternatives = recog_data.n_alternatives;
7265 for (i = 0; i < n_operands; i++)
7267 rtx op = recog_data.operand[i];
7268 machine_mode mode = GET_MODE (op);
7269 const operand_alternative *op_alt;
7274 /* For pre-AVX disallow unaligned loads/stores where the
7275 instructions don't support it. */
7277 && VECTOR_MODE_P (mode)
7278 && misaligned_operand (op, mode))
7280 unsigned int min_align = get_attr_ssememalign (insn);
7282 || MEM_ALIGN (op) < min_align)
7286 /* A unary operator may be accepted by the predicate, but it
7287 is irrelevant for matching constraints. */
7293 if (REG_P (SUBREG_REG (op))
7294 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
7295 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
7296 GET_MODE (SUBREG_REG (op)),
7299 op = SUBREG_REG (op);
7302 if (!(REG_P (op) && HARD_REGISTER_P (op)))
7305 op_alt = recog_op_alt;
7307 /* Operand has no constraints, anything is OK. */
7308 win = !n_alternatives;
7310 alternative_mask preferred = get_preferred_alternatives (insn);
7311 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
7313 if (!TEST_BIT (preferred, j))
7315 if (op_alt[i].anything_ok
7316 || (op_alt[i].matches != -1
7318 (recog_data.operand[i],
7319 recog_data.operand[op_alt[i].matches]))
7320 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
7335 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7337 static unsigned HOST_WIDE_INT
7338 ix86_asan_shadow_offset (void)
7340 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
7341 : HOST_WIDE_INT_C (0x7fff8000))
7342 : (HOST_WIDE_INT_1 << 29);
7345 /* Argument support functions. */
7347 /* Return true when register may be used to pass function parameters. */
7349 ix86_function_arg_regno_p (int regno)
7352 enum calling_abi call_abi;
7353 const int *parm_regs;
7355 if (TARGET_MPX && BND_REGNO_P (regno))
7361 return (regno < REGPARM_MAX
7362 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
7364 return (regno < REGPARM_MAX
7365 || (TARGET_MMX && MMX_REGNO_P (regno)
7366 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
7367 || (TARGET_SSE && SSE_REGNO_P (regno)
7368 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
7371 if (TARGET_SSE && SSE_REGNO_P (regno)
7372 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
7375 /* TODO: The function should depend on current function ABI but
7376 builtins.c would need updating then. Therefore we use the
7378 call_abi = ix86_cfun_abi ();
7380 /* RAX is used as hidden argument to va_arg functions. */
7381 if (call_abi == SYSV_ABI && regno == AX_REG)
7384 if (call_abi == MS_ABI)
7385 parm_regs = x86_64_ms_abi_int_parameter_registers;
7387 parm_regs = x86_64_int_parameter_registers;
7389 for (i = 0; i < (call_abi == MS_ABI
7390 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
7391 if (regno == parm_regs[i])
7396 /* Return if we do not know how to pass TYPE solely in registers. */
7399 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
7401 if (must_pass_in_stack_var_size_or_pad (mode, type))
7404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
7405 The layout_type routine is crafty and tries to trick us into passing
7406 currently unsupported vector types on the stack by using TImode. */
7407 return (!TARGET_64BIT && mode == TImode
7408 && type && TREE_CODE (type) != VECTOR_TYPE);
7411 /* It returns the size, in bytes, of the area reserved for arguments passed
7412 in registers for the function represented by fndecl dependent to the used
7415 ix86_reg_parm_stack_space (const_tree fndecl)
7417 enum calling_abi call_abi = SYSV_ABI;
7418 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
7419 call_abi = ix86_function_abi (fndecl);
7421 call_abi = ix86_function_type_abi (fndecl);
7422 if (TARGET_64BIT && call_abi == MS_ABI)
7427 /* We add this as a workaround in order to use libc_has_function
7430 ix86_libc_has_function (enum function_class fn_class)
7432 return targetm.libc_has_function (fn_class);
7435 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
7436 specifying the call abi used. */
7438 ix86_function_type_abi (const_tree fntype)
7440 enum calling_abi abi = ix86_abi;
7442 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
7446 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
7449 error ("X32 does not support ms_abi attribute");
7453 else if (abi == MS_ABI
7454 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
7460 static enum calling_abi
7461 ix86_function_abi (const_tree fndecl)
7463 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
7466 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
7467 specifying the call abi used. */
7469 ix86_cfun_abi (void)
7471 return cfun ? cfun->machine->call_abi : ix86_abi;
7475 ix86_function_ms_hook_prologue (const_tree fn)
7477 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
7479 if (decl_function_context (fn) != NULL_TREE)
7480 error_at (DECL_SOURCE_LOCATION (fn),
7481 "ms_hook_prologue is not compatible with nested function");
7488 /* Write the extra assembler code needed to declare a function properly. */
7491 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
7494 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
7498 int i, filler_count = (TARGET_64BIT ? 32 : 16);
7499 unsigned int filler_cc = 0xcccccccc;
7501 for (i = 0; i < filler_count; i += 4)
7502 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
7505 #ifdef SUBTARGET_ASM_UNWIND_INIT
7506 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
7509 ASM_OUTPUT_LABEL (asm_out_file, fname);
7511 /* Output magic byte marker, if hot-patch attribute is set. */
7516 /* leaq [%rsp + 0], %rsp */
7517 asm_fprintf (asm_out_file, ASM_BYTE
7518 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
7522 /* movl.s %edi, %edi
7524 movl.s %esp, %ebp */
7525 asm_fprintf (asm_out_file, ASM_BYTE
7526 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
7532 extern void init_regs (void);
7534 /* Implementation of call abi switching target hook. Specific to FNDECL
7535 the specific call register sets are set. See also
7536 ix86_conditional_register_usage for more details. */
7538 ix86_call_abi_override (const_tree fndecl)
7540 cfun->machine->call_abi = ix86_function_abi (fndecl);
7543 /* Return 1 if pseudo register should be created and used to hold
7544 GOT address for PIC code. */
7546 ix86_use_pseudo_pic_reg (void)
7549 && (ix86_cmodel == CM_SMALL_PIC
7556 /* Initialize large model PIC register. */
7559 ix86_init_large_pic_reg (unsigned int tmp_regno)
7561 rtx_code_label *label;
7564 gcc_assert (Pmode == DImode);
7565 label = gen_label_rtx ();
7567 LABEL_PRESERVE_P (label) = 1;
7568 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
7569 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
7570 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
7572 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7573 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
7574 pic_offset_table_rtx, tmp_reg));
7577 /* Create and initialize PIC register if required. */
7579 ix86_init_pic_reg (void)
7584 if (!ix86_use_pseudo_pic_reg ())
7591 if (ix86_cmodel == CM_LARGE_PIC)
7592 ix86_init_large_pic_reg (R11_REG);
7594 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7598 /* If there is future mcount call in the function it is more profitable
7599 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
7600 rtx reg = crtl->profile
7601 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
7602 : pic_offset_table_rtx;
7603 rtx_insn *insn = emit_insn (gen_set_got (reg));
7604 RTX_FRAME_RELATED_P (insn) = 1;
7606 emit_move_insn (pic_offset_table_rtx, reg);
7607 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
7613 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7614 insert_insn_on_edge (seq, entry_edge);
7615 commit_one_edge_insertion (entry_edge);
7618 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7619 for a call to a function whose data type is FNTYPE.
7620 For a library call, FNTYPE is 0. */
7623 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
7624 tree fntype, /* tree ptr for function decl */
7625 rtx libname, /* SYMBOL_REF of library name or 0 */
7629 struct cgraph_local_info *i = NULL;
7630 struct cgraph_node *target = NULL;
7632 memset (cum, 0, sizeof (*cum));
7636 target = cgraph_node::get (fndecl);
7639 target = target->function_symbol ();
7640 i = cgraph_node::local_info (target->decl);
7641 cum->call_abi = ix86_function_abi (target->decl);
7644 cum->call_abi = ix86_function_abi (fndecl);
7647 cum->call_abi = ix86_function_type_abi (fntype);
7649 cum->caller = caller;
7651 /* Set up the number of registers to use for passing arguments. */
7652 cum->nregs = ix86_regparm;
7655 cum->nregs = (cum->call_abi == SYSV_ABI
7656 ? X86_64_REGPARM_MAX
7657 : X86_64_MS_REGPARM_MAX);
7661 cum->sse_nregs = SSE_REGPARM_MAX;
7664 cum->sse_nregs = (cum->call_abi == SYSV_ABI
7665 ? X86_64_SSE_REGPARM_MAX
7666 : X86_64_MS_SSE_REGPARM_MAX);
7670 cum->mmx_nregs = MMX_REGPARM_MAX;
7671 cum->warn_avx512f = true;
7672 cum->warn_avx = true;
7673 cum->warn_sse = true;
7674 cum->warn_mmx = true;
7676 /* Because type might mismatch in between caller and callee, we need to
7677 use actual type of function for local calls.
7678 FIXME: cgraph_analyze can be told to actually record if function uses
7679 va_start so for local functions maybe_vaarg can be made aggressive
7681 FIXME: once typesytem is fixed, we won't need this code anymore. */
7682 if (i && i->local && i->can_change_signature)
7683 fntype = TREE_TYPE (target->decl);
7684 cum->stdarg = stdarg_p (fntype);
7685 cum->maybe_vaarg = (fntype
7686 ? (!prototype_p (fntype) || stdarg_p (fntype))
7689 cum->bnd_regno = FIRST_BND_REG;
7690 cum->bnds_in_bt = 0;
7691 cum->force_bnd_pass = 0;
7696 /* If there are variable arguments, then we won't pass anything
7697 in registers in 32-bit mode. */
7698 if (stdarg_p (fntype))
7701 /* Since in 32-bit, variable arguments are always passed on
7702 stack, there is scratch register available for indirect
7704 cfun->machine->arg_reg_available = true;
7707 cum->warn_avx512f = false;
7708 cum->warn_avx = false;
7709 cum->warn_sse = false;
7710 cum->warn_mmx = false;
7714 /* Use ecx and edx registers if function has fastcall attribute,
7715 else look for regparm information. */
7718 unsigned int ccvt = ix86_get_callcvt (fntype);
7719 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
7722 cum->fastcall = 1; /* Same first register as in fastcall. */
7724 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
7730 cum->nregs = ix86_function_regparm (fntype, fndecl);
7733 /* Set up the number of SSE registers used for passing SFmode
7734 and DFmode arguments. Warn for mismatching ABI. */
7735 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
7738 cfun->machine->arg_reg_available = (cum->nregs > 0);
7741 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
7742 But in the case of vector types, it is some vector mode.
7744 When we have only some of our vector isa extensions enabled, then there
7745 are some modes for which vector_mode_supported_p is false. For these
7746 modes, the generic vector support in gcc will choose some non-vector mode
7747 in order to implement the type. By computing the natural mode, we'll
7748 select the proper ABI location for the operand and not depend on whatever
7749 the middle-end decides to do with these vector types.
7751 The midde-end can't deal with the vector types > 16 bytes. In this
7752 case, we return the original mode and warn ABI change if CUM isn't
7755 If INT_RETURN is true, warn ABI change if the vector mode isn't
7756 available for function return value. */
7759 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
7762 machine_mode mode = TYPE_MODE (type);
7764 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
7766 HOST_WIDE_INT size = int_size_in_bytes (type);
7767 if ((size == 8 || size == 16 || size == 32 || size == 64)
7768 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
7769 && TYPE_VECTOR_SUBPARTS (type) > 1)
7771 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
7773 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
7774 mode = MIN_MODE_VECTOR_FLOAT;
7776 mode = MIN_MODE_VECTOR_INT;
7778 /* Get the mode which has this inner mode and number of units. */
7779 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
7780 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
7781 && GET_MODE_INNER (mode) == innermode)
7783 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
7785 static bool warnedavx512f;
7786 static bool warnedavx512f_ret;
7788 if (cum && cum->warn_avx512f && !warnedavx512f)
7790 if (warning (OPT_Wpsabi, "AVX512F vector argument "
7791 "without AVX512F enabled changes the ABI"))
7792 warnedavx512f = true;
7794 else if (in_return && !warnedavx512f_ret)
7796 if (warning (OPT_Wpsabi, "AVX512F vector return "
7797 "without AVX512F enabled changes the ABI"))
7798 warnedavx512f_ret = true;
7801 return TYPE_MODE (type);
7803 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
7805 static bool warnedavx;
7806 static bool warnedavx_ret;
7808 if (cum && cum->warn_avx && !warnedavx)
7810 if (warning (OPT_Wpsabi, "AVX vector argument "
7811 "without AVX enabled changes the ABI"))
7814 else if (in_return && !warnedavx_ret)
7816 if (warning (OPT_Wpsabi, "AVX vector return "
7817 "without AVX enabled changes the ABI"))
7818 warnedavx_ret = true;
7821 return TYPE_MODE (type);
7823 else if (((size == 8 && TARGET_64BIT) || size == 16)
7827 static bool warnedsse;
7828 static bool warnedsse_ret;
7830 if (cum && cum->warn_sse && !warnedsse)
7832 if (warning (OPT_Wpsabi, "SSE vector argument "
7833 "without SSE enabled changes the ABI"))
7836 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
7838 if (warning (OPT_Wpsabi, "SSE vector return "
7839 "without SSE enabled changes the ABI"))
7840 warnedsse_ret = true;
7843 else if ((size == 8 && !TARGET_64BIT)
7847 static bool warnedmmx;
7848 static bool warnedmmx_ret;
7850 if (cum && cum->warn_mmx && !warnedmmx)
7852 if (warning (OPT_Wpsabi, "MMX vector argument "
7853 "without MMX enabled changes the ABI"))
7856 else if (in_return && !warnedmmx_ret)
7858 if (warning (OPT_Wpsabi, "MMX vector return "
7859 "without MMX enabled changes the ABI"))
7860 warnedmmx_ret = true;
7873 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
7874 this may not agree with the mode that the type system has chosen for the
7875 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
7876 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
7879 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
7884 if (orig_mode != BLKmode)
7885 tmp = gen_rtx_REG (orig_mode, regno);
7888 tmp = gen_rtx_REG (mode, regno);
7889 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
7890 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
7896 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
7897 of this code is to classify each 8bytes of incoming argument by the register
7898 class and assign registers accordingly. */
7900 /* Return the union class of CLASS1 and CLASS2.
7901 See the x86-64 PS ABI for details. */
7903 static enum x86_64_reg_class
7904 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
7906 /* Rule #1: If both classes are equal, this is the resulting class. */
7907 if (class1 == class2)
7910 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
7912 if (class1 == X86_64_NO_CLASS)
7914 if (class2 == X86_64_NO_CLASS)
7917 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
7918 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
7919 return X86_64_MEMORY_CLASS;
7921 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
7922 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
7923 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
7924 return X86_64_INTEGERSI_CLASS;
7925 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
7926 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
7927 return X86_64_INTEGER_CLASS;
7929 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
7931 if (class1 == X86_64_X87_CLASS
7932 || class1 == X86_64_X87UP_CLASS
7933 || class1 == X86_64_COMPLEX_X87_CLASS
7934 || class2 == X86_64_X87_CLASS
7935 || class2 == X86_64_X87UP_CLASS
7936 || class2 == X86_64_COMPLEX_X87_CLASS)
7937 return X86_64_MEMORY_CLASS;
7939 /* Rule #6: Otherwise class SSE is used. */
7940 return X86_64_SSE_CLASS;
7943 /* Classify the argument of type TYPE and mode MODE.
7944 CLASSES will be filled by the register class used to pass each word
7945 of the operand. The number of words is returned. In case the parameter
7946 should be passed in memory, 0 is returned. As a special case for zero
7947 sized containers, classes[0] will be NO_CLASS and 1 is returned.
7949 BIT_OFFSET is used internally for handling records and specifies offset
7950 of the offset in bits modulo 512 to avoid overflow cases.
7952 See the x86-64 PS ABI for details.
7956 classify_argument (machine_mode mode, const_tree type,
7957 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
7959 HOST_WIDE_INT bytes =
7960 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7961 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
7963 /* Variable sized entities are always passed/returned in memory. */
7967 if (mode != VOIDmode
7968 && targetm.calls.must_pass_in_stack (mode, type))
7971 if (type && AGGREGATE_TYPE_P (type))
7975 enum x86_64_reg_class subclasses[MAX_CLASSES];
7977 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
7981 for (i = 0; i < words; i++)
7982 classes[i] = X86_64_NO_CLASS;
7984 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
7985 signalize memory class, so handle it as special case. */
7988 classes[0] = X86_64_NO_CLASS;
7992 /* Classify each field of record and merge classes. */
7993 switch (TREE_CODE (type))
7996 /* And now merge the fields of structure. */
7997 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7999 if (TREE_CODE (field) == FIELD_DECL)
8003 if (TREE_TYPE (field) == error_mark_node)
8006 /* Bitfields are always classified as integer. Handle them
8007 early, since later code would consider them to be
8008 misaligned integers. */
8009 if (DECL_BIT_FIELD (field))
8011 for (i = (int_bit_position (field)
8012 + (bit_offset % 64)) / 8 / 8;
8013 i < ((int_bit_position (field) + (bit_offset % 64))
8014 + tree_to_shwi (DECL_SIZE (field))
8017 merge_classes (X86_64_INTEGER_CLASS,
8024 type = TREE_TYPE (field);
8026 /* Flexible array member is ignored. */
8027 if (TYPE_MODE (type) == BLKmode
8028 && TREE_CODE (type) == ARRAY_TYPE
8029 && TYPE_SIZE (type) == NULL_TREE
8030 && TYPE_DOMAIN (type) != NULL_TREE
8031 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
8036 if (!warned && warn_psabi)
8039 inform (input_location,
8040 "the ABI of passing struct with"
8041 " a flexible array member has"
8042 " changed in GCC 4.4");
8046 num = classify_argument (TYPE_MODE (type), type,
8048 (int_bit_position (field)
8049 + bit_offset) % 512);
8052 pos = (int_bit_position (field)
8053 + (bit_offset % 64)) / 8 / 8;
8054 for (i = 0; i < num && (i + pos) < words; i++)
8056 merge_classes (subclasses[i], classes[i + pos]);
8063 /* Arrays are handled as small records. */
8066 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
8067 TREE_TYPE (type), subclasses, bit_offset);
8071 /* The partial classes are now full classes. */
8072 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
8073 subclasses[0] = X86_64_SSE_CLASS;
8074 if (subclasses[0] == X86_64_INTEGERSI_CLASS
8075 && !((bit_offset % 64) == 0 && bytes == 4))
8076 subclasses[0] = X86_64_INTEGER_CLASS;
8078 for (i = 0; i < words; i++)
8079 classes[i] = subclasses[i % num];
8084 case QUAL_UNION_TYPE:
8085 /* Unions are similar to RECORD_TYPE but offset is always 0.
8087 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8089 if (TREE_CODE (field) == FIELD_DECL)
8093 if (TREE_TYPE (field) == error_mark_node)
8096 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
8097 TREE_TYPE (field), subclasses,
8101 for (i = 0; i < num && i < words; i++)
8102 classes[i] = merge_classes (subclasses[i], classes[i]);
8113 /* When size > 16 bytes, if the first one isn't
8114 X86_64_SSE_CLASS or any other ones aren't
8115 X86_64_SSEUP_CLASS, everything should be passed in
8117 if (classes[0] != X86_64_SSE_CLASS)
8120 for (i = 1; i < words; i++)
8121 if (classes[i] != X86_64_SSEUP_CLASS)
8125 /* Final merger cleanup. */
8126 for (i = 0; i < words; i++)
8128 /* If one class is MEMORY, everything should be passed in
8130 if (classes[i] == X86_64_MEMORY_CLASS)
8133 /* The X86_64_SSEUP_CLASS should be always preceded by
8134 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
8135 if (classes[i] == X86_64_SSEUP_CLASS
8136 && classes[i - 1] != X86_64_SSE_CLASS
8137 && classes[i - 1] != X86_64_SSEUP_CLASS)
8139 /* The first one should never be X86_64_SSEUP_CLASS. */
8140 gcc_assert (i != 0);
8141 classes[i] = X86_64_SSE_CLASS;
8144 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
8145 everything should be passed in memory. */
8146 if (classes[i] == X86_64_X87UP_CLASS
8147 && (classes[i - 1] != X86_64_X87_CLASS))
8151 /* The first one should never be X86_64_X87UP_CLASS. */
8152 gcc_assert (i != 0);
8153 if (!warned && warn_psabi)
8156 inform (input_location,
8157 "the ABI of passing union with long double"
8158 " has changed in GCC 4.4");
8166 /* Compute alignment needed. We align all types to natural boundaries with
8167 exception of XFmode that is aligned to 64bits. */
8168 if (mode != VOIDmode && mode != BLKmode)
8170 int mode_alignment = GET_MODE_BITSIZE (mode);
8173 mode_alignment = 128;
8174 else if (mode == XCmode)
8175 mode_alignment = 256;
8176 if (COMPLEX_MODE_P (mode))
8177 mode_alignment /= 2;
8178 /* Misaligned fields are always returned in memory. */
8179 if (bit_offset % mode_alignment)
8183 /* for V1xx modes, just use the base mode */
8184 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
8185 && GET_MODE_UNIT_SIZE (mode) == bytes)
8186 mode = GET_MODE_INNER (mode);
8188 /* Classification of atomic types. */
8193 classes[0] = X86_64_SSE_CLASS;
8196 classes[0] = X86_64_SSE_CLASS;
8197 classes[1] = X86_64_SSEUP_CLASS;
8207 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
8209 /* Analyze last 128 bits only. */
8210 size = (size - 1) & 0x7f;
8214 classes[0] = X86_64_INTEGERSI_CLASS;
8219 classes[0] = X86_64_INTEGER_CLASS;
8222 else if (size < 64+32)
8224 classes[0] = X86_64_INTEGER_CLASS;
8225 classes[1] = X86_64_INTEGERSI_CLASS;
8228 else if (size < 64+64)
8230 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8238 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
8242 /* OImode shouldn't be used directly. */
8247 if (!(bit_offset % 64))
8248 classes[0] = X86_64_SSESF_CLASS;
8250 classes[0] = X86_64_SSE_CLASS;
8253 classes[0] = X86_64_SSEDF_CLASS;
8256 classes[0] = X86_64_X87_CLASS;
8257 classes[1] = X86_64_X87UP_CLASS;
8260 classes[0] = X86_64_SSE_CLASS;
8261 classes[1] = X86_64_SSEUP_CLASS;
8264 classes[0] = X86_64_SSE_CLASS;
8265 if (!(bit_offset % 64))
8271 if (!warned && warn_psabi)
8274 inform (input_location,
8275 "the ABI of passing structure with complex float"
8276 " member has changed in GCC 4.4");
8278 classes[1] = X86_64_SSESF_CLASS;
8282 classes[0] = X86_64_SSEDF_CLASS;
8283 classes[1] = X86_64_SSEDF_CLASS;
8286 classes[0] = X86_64_COMPLEX_X87_CLASS;
8289 /* This modes is larger than 16 bytes. */
8297 classes[0] = X86_64_SSE_CLASS;
8298 classes[1] = X86_64_SSEUP_CLASS;
8299 classes[2] = X86_64_SSEUP_CLASS;
8300 classes[3] = X86_64_SSEUP_CLASS;
8308 classes[0] = X86_64_SSE_CLASS;
8309 classes[1] = X86_64_SSEUP_CLASS;
8310 classes[2] = X86_64_SSEUP_CLASS;
8311 classes[3] = X86_64_SSEUP_CLASS;
8312 classes[4] = X86_64_SSEUP_CLASS;
8313 classes[5] = X86_64_SSEUP_CLASS;
8314 classes[6] = X86_64_SSEUP_CLASS;
8315 classes[7] = X86_64_SSEUP_CLASS;
8323 classes[0] = X86_64_SSE_CLASS;
8324 classes[1] = X86_64_SSEUP_CLASS;
8332 classes[0] = X86_64_SSE_CLASS;
8338 gcc_assert (VECTOR_MODE_P (mode));
8343 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
8345 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
8346 classes[0] = X86_64_INTEGERSI_CLASS;
8348 classes[0] = X86_64_INTEGER_CLASS;
8349 classes[1] = X86_64_INTEGER_CLASS;
8350 return 1 + (bytes > 8);
8354 /* Examine the argument and return set number of register required in each
8355 class. Return true iff parameter should be passed in memory. */
8358 examine_argument (machine_mode mode, const_tree type, int in_return,
8359 int *int_nregs, int *sse_nregs)
8361 enum x86_64_reg_class regclass[MAX_CLASSES];
8362 int n = classify_argument (mode, type, regclass, 0);
8369 for (n--; n >= 0; n--)
8370 switch (regclass[n])
8372 case X86_64_INTEGER_CLASS:
8373 case X86_64_INTEGERSI_CLASS:
8376 case X86_64_SSE_CLASS:
8377 case X86_64_SSESF_CLASS:
8378 case X86_64_SSEDF_CLASS:
8381 case X86_64_NO_CLASS:
8382 case X86_64_SSEUP_CLASS:
8384 case X86_64_X87_CLASS:
8385 case X86_64_X87UP_CLASS:
8386 case X86_64_COMPLEX_X87_CLASS:
8390 case X86_64_MEMORY_CLASS:
8397 /* Construct container for the argument used by GCC interface. See
8398 FUNCTION_ARG for the detailed description. */
8401 construct_container (machine_mode mode, machine_mode orig_mode,
8402 const_tree type, int in_return, int nintregs, int nsseregs,
8403 const int *intreg, int sse_regno)
8405 /* The following variables hold the static issued_error state. */
8406 static bool issued_sse_arg_error;
8407 static bool issued_sse_ret_error;
8408 static bool issued_x87_ret_error;
8410 machine_mode tmpmode;
8412 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
8413 enum x86_64_reg_class regclass[MAX_CLASSES];
8417 int needed_sseregs, needed_intregs;
8418 rtx exp[MAX_CLASSES];
8421 n = classify_argument (mode, type, regclass, 0);
8424 if (examine_argument (mode, type, in_return, &needed_intregs,
8427 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
8430 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
8431 some less clueful developer tries to use floating-point anyway. */
8432 if (needed_sseregs && !TARGET_SSE)
8436 if (!issued_sse_ret_error)
8438 error ("SSE register return with SSE disabled");
8439 issued_sse_ret_error = true;
8442 else if (!issued_sse_arg_error)
8444 error ("SSE register argument with SSE disabled");
8445 issued_sse_arg_error = true;
8450 /* Likewise, error if the ABI requires us to return values in the
8451 x87 registers and the user specified -mno-80387. */
8452 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
8453 for (i = 0; i < n; i++)
8454 if (regclass[i] == X86_64_X87_CLASS
8455 || regclass[i] == X86_64_X87UP_CLASS
8456 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
8458 if (!issued_x87_ret_error)
8460 error ("x87 register return with x87 disabled");
8461 issued_x87_ret_error = true;
8466 /* First construct simple cases. Avoid SCmode, since we want to use
8467 single register to pass this type. */
8468 if (n == 1 && mode != SCmode)
8469 switch (regclass[0])
8471 case X86_64_INTEGER_CLASS:
8472 case X86_64_INTEGERSI_CLASS:
8473 return gen_rtx_REG (mode, intreg[0]);
8474 case X86_64_SSE_CLASS:
8475 case X86_64_SSESF_CLASS:
8476 case X86_64_SSEDF_CLASS:
8477 if (mode != BLKmode)
8478 return gen_reg_or_parallel (mode, orig_mode,
8479 SSE_REGNO (sse_regno));
8481 case X86_64_X87_CLASS:
8482 case X86_64_COMPLEX_X87_CLASS:
8483 return gen_rtx_REG (mode, FIRST_STACK_REG);
8484 case X86_64_NO_CLASS:
8485 /* Zero sized array, struct or class. */
8491 && regclass[0] == X86_64_SSE_CLASS
8492 && regclass[1] == X86_64_SSEUP_CLASS
8494 return gen_reg_or_parallel (mode, orig_mode,
8495 SSE_REGNO (sse_regno));
8497 && regclass[0] == X86_64_SSE_CLASS
8498 && regclass[1] == X86_64_SSEUP_CLASS
8499 && regclass[2] == X86_64_SSEUP_CLASS
8500 && regclass[3] == X86_64_SSEUP_CLASS
8502 return gen_reg_or_parallel (mode, orig_mode,
8503 SSE_REGNO (sse_regno));
8505 && regclass[0] == X86_64_SSE_CLASS
8506 && regclass[1] == X86_64_SSEUP_CLASS
8507 && regclass[2] == X86_64_SSEUP_CLASS
8508 && regclass[3] == X86_64_SSEUP_CLASS
8509 && regclass[4] == X86_64_SSEUP_CLASS
8510 && regclass[5] == X86_64_SSEUP_CLASS
8511 && regclass[6] == X86_64_SSEUP_CLASS
8512 && regclass[7] == X86_64_SSEUP_CLASS
8514 return gen_reg_or_parallel (mode, orig_mode,
8515 SSE_REGNO (sse_regno));
8517 && regclass[0] == X86_64_X87_CLASS
8518 && regclass[1] == X86_64_X87UP_CLASS)
8519 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
8522 && regclass[0] == X86_64_INTEGER_CLASS
8523 && regclass[1] == X86_64_INTEGER_CLASS
8524 && (mode == CDImode || mode == TImode)
8525 && intreg[0] + 1 == intreg[1])
8526 return gen_rtx_REG (mode, intreg[0]);
8528 /* Otherwise figure out the entries of the PARALLEL. */
8529 for (i = 0; i < n; i++)
8533 switch (regclass[i])
8535 case X86_64_NO_CLASS:
8537 case X86_64_INTEGER_CLASS:
8538 case X86_64_INTEGERSI_CLASS:
8539 /* Merge TImodes on aligned occasions here too. */
8540 if (i * 8 + 8 > bytes)
8542 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
8543 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
8547 /* We've requested 24 bytes we
8548 don't have mode for. Use DImode. */
8549 if (tmpmode == BLKmode)
8552 = gen_rtx_EXPR_LIST (VOIDmode,
8553 gen_rtx_REG (tmpmode, *intreg),
8557 case X86_64_SSESF_CLASS:
8559 = gen_rtx_EXPR_LIST (VOIDmode,
8560 gen_rtx_REG (SFmode,
8561 SSE_REGNO (sse_regno)),
8565 case X86_64_SSEDF_CLASS:
8567 = gen_rtx_EXPR_LIST (VOIDmode,
8568 gen_rtx_REG (DFmode,
8569 SSE_REGNO (sse_regno)),
8573 case X86_64_SSE_CLASS:
8581 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
8591 && regclass[1] == X86_64_SSEUP_CLASS
8592 && regclass[2] == X86_64_SSEUP_CLASS
8593 && regclass[3] == X86_64_SSEUP_CLASS);
8599 && regclass[1] == X86_64_SSEUP_CLASS
8600 && regclass[2] == X86_64_SSEUP_CLASS
8601 && regclass[3] == X86_64_SSEUP_CLASS
8602 && regclass[4] == X86_64_SSEUP_CLASS
8603 && regclass[5] == X86_64_SSEUP_CLASS
8604 && regclass[6] == X86_64_SSEUP_CLASS
8605 && regclass[7] == X86_64_SSEUP_CLASS);
8613 = gen_rtx_EXPR_LIST (VOIDmode,
8614 gen_rtx_REG (tmpmode,
8615 SSE_REGNO (sse_regno)),
8624 /* Empty aligned struct, union or class. */
8628 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
8629 for (i = 0; i < nexps; i++)
8630 XVECEXP (ret, 0, i) = exp [i];
8634 /* Update the data in CUM to advance over an argument of mode MODE
8635 and data type TYPE. (TYPE is null for libcalls where that information
8636 may not be available.)
8638 Return a number of integer regsiters advanced over. */
8641 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8642 const_tree type, HOST_WIDE_INT bytes,
8643 HOST_WIDE_INT words)
8646 bool error_p = NULL;
8650 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8651 bytes in registers. */
8652 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8672 cum->words += words;
8673 cum->nregs -= words;
8674 cum->regno += words;
8675 if (cum->nregs >= 0)
8677 if (cum->nregs <= 0)
8680 cfun->machine->arg_reg_available = false;
8686 /* OImode shouldn't be used directly. */
8690 if (cum->float_in_sse == -1)
8692 if (cum->float_in_sse < 2)
8695 if (cum->float_in_sse == -1)
8697 if (cum->float_in_sse < 1)
8720 if (!type || !AGGREGATE_TYPE_P (type))
8722 cum->sse_words += words;
8723 cum->sse_nregs -= 1;
8724 cum->sse_regno += 1;
8725 if (cum->sse_nregs <= 0)
8739 if (!type || !AGGREGATE_TYPE_P (type))
8741 cum->mmx_words += words;
8742 cum->mmx_nregs -= 1;
8743 cum->mmx_regno += 1;
8744 if (cum->mmx_nregs <= 0)
8754 cum->float_in_sse = 0;
8755 error ("calling %qD with SSE calling convention without "
8756 "SSE/SSE2 enabled", cum->decl);
8757 sorry ("this is a GCC bug that can be worked around by adding "
8758 "attribute used to function called");
8765 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
8766 const_tree type, HOST_WIDE_INT words, bool named)
8768 int int_nregs, sse_nregs;
8770 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
8771 if (!named && (VALID_AVX512F_REG_MODE (mode)
8772 || VALID_AVX256_REG_MODE (mode)))
8775 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
8776 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
8778 cum->nregs -= int_nregs;
8779 cum->sse_nregs -= sse_nregs;
8780 cum->regno += int_nregs;
8781 cum->sse_regno += sse_nregs;
8786 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
8787 cum->words = ROUND_UP (cum->words, align);
8788 cum->words += words;
8794 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
8795 HOST_WIDE_INT words)
8797 /* Otherwise, this should be passed indirect. */
8798 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
8800 cum->words += words;
8810 /* Update the data in CUM to advance over an argument of mode MODE and
8811 data type TYPE. (TYPE is null for libcalls where that information
8812 may not be available.) */
8815 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
8816 const_tree type, bool named)
8818 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8819 HOST_WIDE_INT bytes, words;
8822 if (mode == BLKmode)
8823 bytes = int_size_in_bytes (type);
8825 bytes = GET_MODE_SIZE (mode);
8826 words = CEIL (bytes, UNITS_PER_WORD);
8829 mode = type_natural_mode (type, NULL, false);
8831 if ((type && POINTER_BOUNDS_TYPE_P (type))
8832 || POINTER_BOUNDS_MODE_P (mode))
8834 /* If we pass bounds in BT then just update remained bounds count. */
8835 if (cum->bnds_in_bt)
8841 /* Update remained number of bounds to force. */
8842 if (cum->force_bnd_pass)
8843 cum->force_bnd_pass--;
8850 /* The first arg not going to Bounds Tables resets this counter. */
8851 cum->bnds_in_bt = 0;
8852 /* For unnamed args we always pass bounds to avoid bounds mess when
8853 passed and received types do not match. If bounds do not follow
8854 unnamed arg, still pretend required number of bounds were passed. */
8855 if (cum->force_bnd_pass)
8857 cum->bnd_regno += cum->force_bnd_pass;
8858 cum->force_bnd_pass = 0;
8863 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8865 if (call_abi == MS_ABI)
8866 nregs = function_arg_advance_ms_64 (cum, bytes, words);
8868 nregs = function_arg_advance_64 (cum, mode, type, words, named);
8871 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
8873 /* For stdarg we expect bounds to be passed for each value passed
8876 cum->force_bnd_pass = nregs;
8877 /* For pointers passed in memory we expect bounds passed in Bounds
8880 cum->bnds_in_bt = chkp_type_bounds_count (type);
8883 /* Define where to put the arguments to a function.
8884 Value is zero to push the argument on the stack,
8885 or a hard register in which to store the argument.
8887 MODE is the argument's machine mode.
8888 TYPE is the data type of the argument (as a tree).
8889 This is null for libcalls where that information may
8891 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8892 the preceding args and about the function being called.
8893 NAMED is nonzero if this argument is a named parameter
8894 (otherwise it is an extra parameter matching an ellipsis). */
8897 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
8898 machine_mode orig_mode, const_tree type,
8899 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
8901 bool error_p = false;
8902 /* Avoid the AL settings for the Unix64 ABI. */
8903 if (mode == VOIDmode)
8908 /* Intel MCU psABI passes scalars and aggregates no larger than 8
8909 bytes in registers. */
8910 if (!VECTOR_MODE_P (mode) && bytes <= 8)
8929 if (words <= cum->nregs)
8931 int regno = cum->regno;
8933 /* Fastcall allocates the first two DWORD (SImode) or
8934 smaller arguments to ECX and EDX if it isn't an
8940 || (type && AGGREGATE_TYPE_P (type)))
8943 /* ECX not EAX is the first allocated register. */
8944 if (regno == AX_REG)
8947 return gen_rtx_REG (mode, regno);
8952 if (cum->float_in_sse == -1)
8954 if (cum->float_in_sse < 2)
8957 if (cum->float_in_sse == -1)
8959 if (cum->float_in_sse < 1)
8963 /* In 32bit, we pass TImode in xmm registers. */
8970 if (!type || !AGGREGATE_TYPE_P (type))
8973 return gen_reg_or_parallel (mode, orig_mode,
8974 cum->sse_regno + FIRST_SSE_REG);
8980 /* OImode and XImode shouldn't be used directly. */
8995 if (!type || !AGGREGATE_TYPE_P (type))
8998 return gen_reg_or_parallel (mode, orig_mode,
8999 cum->sse_regno + FIRST_SSE_REG);
9009 if (!type || !AGGREGATE_TYPE_P (type))
9012 return gen_reg_or_parallel (mode, orig_mode,
9013 cum->mmx_regno + FIRST_MMX_REG);
9019 cum->float_in_sse = 0;
9020 error ("calling %qD with SSE calling convention without "
9021 "SSE/SSE2 enabled", cum->decl);
9022 sorry ("this is a GCC bug that can be worked around by adding "
9023 "attribute used to function called");
9030 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9031 machine_mode orig_mode, const_tree type, bool named)
9033 /* Handle a hidden AL argument containing number of registers
9034 for varargs x86-64 functions. */
9035 if (mode == VOIDmode)
9036 return GEN_INT (cum->maybe_vaarg
9037 ? (cum->sse_nregs < 0
9038 ? X86_64_SSE_REGPARM_MAX
9059 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9065 return construct_container (mode, orig_mode, type, 0, cum->nregs,
9067 &x86_64_int_parameter_registers [cum->regno],
9072 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
9073 machine_mode orig_mode, bool named,
9074 HOST_WIDE_INT bytes)
9078 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
9079 We use value of -2 to specify that current function call is MSABI. */
9080 if (mode == VOIDmode)
9081 return GEN_INT (-2);
9083 /* If we've run out of registers, it goes on the stack. */
9084 if (cum->nregs == 0)
9087 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
9089 /* Only floating point modes are passed in anything but integer regs. */
9090 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
9093 regno = cum->regno + FIRST_SSE_REG;
9098 /* Unnamed floating parameters are passed in both the
9099 SSE and integer registers. */
9100 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
9101 t2 = gen_rtx_REG (mode, regno);
9102 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
9103 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
9104 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
9107 /* Handle aggregated types passed in register. */
9108 if (orig_mode == BLKmode)
9110 if (bytes > 0 && bytes <= 8)
9111 mode = (bytes > 4 ? DImode : SImode);
9112 if (mode == BLKmode)
9116 return gen_reg_or_parallel (mode, orig_mode, regno);
9119 /* Return where to put the arguments to a function.
9120 Return zero to push the argument on the stack, or a hard register in which to store the argument.
9122 MODE is the argument's machine mode. TYPE is the data type of the
9123 argument. It is null for libcalls where that information may not be
9124 available. CUM gives information about the preceding args and about
9125 the function being called. NAMED is nonzero if this argument is a
9126 named parameter (otherwise it is an extra parameter matching an
9130 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
9131 const_tree type, bool named)
9133 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9134 machine_mode mode = omode;
9135 HOST_WIDE_INT bytes, words;
9138 /* All pointer bounds arguments are handled separately here. */
9139 if ((type && POINTER_BOUNDS_TYPE_P (type))
9140 || POINTER_BOUNDS_MODE_P (mode))
9142 /* Return NULL if bounds are forced to go in Bounds Table. */
9143 if (cum->bnds_in_bt)
9145 /* Return the next available bound reg if any. */
9146 else if (cum->bnd_regno <= LAST_BND_REG)
9147 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
9148 /* Return the next special slot number otherwise. */
9150 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
9155 if (mode == BLKmode)
9156 bytes = int_size_in_bytes (type);
9158 bytes = GET_MODE_SIZE (mode);
9159 words = CEIL (bytes, UNITS_PER_WORD);
9161 /* To simplify the code below, represent vector types with a vector mode
9162 even if MMX/SSE are not active. */
9163 if (type && TREE_CODE (type) == VECTOR_TYPE)
9164 mode = type_natural_mode (type, cum, false);
9168 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9170 if (call_abi == MS_ABI)
9171 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
9173 arg = function_arg_64 (cum, mode, omode, type, named);
9176 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
9181 /* A C expression that indicates when an argument must be passed by
9182 reference. If nonzero for an argument, a copy of that argument is
9183 made in memory and a pointer to the argument is passed instead of
9184 the argument itself. The pointer is passed in whatever way is
9185 appropriate for passing a pointer to that type. */
9188 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
9189 const_tree type, bool)
9191 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9193 /* Bounds are never passed by reference. */
9194 if ((type && POINTER_BOUNDS_TYPE_P (type))
9195 || POINTER_BOUNDS_MODE_P (mode))
9200 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
9202 /* See Windows x64 Software Convention. */
9203 if (call_abi == MS_ABI)
9205 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
9209 /* Arrays are passed by reference. */
9210 if (TREE_CODE (type) == ARRAY_TYPE)
9213 if (RECORD_OR_UNION_TYPE_P (type))
9215 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
9216 are passed by reference. */
9217 msize = int_size_in_bytes (type);
9221 /* __m128 is passed by reference. */
9222 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
9224 else if (type && int_size_in_bytes (type) == -1)
9231 /* Return true when TYPE should be 128bit aligned for 32bit argument
9232 passing ABI. XXX: This function is obsolete and is only used for
9233 checking psABI compatibility with previous versions of GCC. */
9236 ix86_compat_aligned_value_p (const_tree type)
9238 machine_mode mode = TYPE_MODE (type);
9239 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
9243 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
9245 if (TYPE_ALIGN (type) < 128)
9248 if (AGGREGATE_TYPE_P (type))
9250 /* Walk the aggregates recursively. */
9251 switch (TREE_CODE (type))
9255 case QUAL_UNION_TYPE:
9259 /* Walk all the structure fields. */
9260 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9262 if (TREE_CODE (field) == FIELD_DECL
9263 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
9270 /* Just for use if some languages passes arrays by value. */
9271 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
9282 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
9283 XXX: This function is obsolete and is only used for checking psABI
9284 compatibility with previous versions of GCC. */
9287 ix86_compat_function_arg_boundary (machine_mode mode,
9288 const_tree type, unsigned int align)
9290 /* In 32bit, only _Decimal128 and __float128 are aligned to their
9291 natural boundaries. */
9292 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
9294 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
9295 make an exception for SSE modes since these require 128bit
9298 The handling here differs from field_alignment. ICC aligns MMX
9299 arguments to 4 byte boundaries, while structure fields are aligned
9300 to 8 byte boundaries. */
9303 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
9304 align = PARM_BOUNDARY;
9308 if (!ix86_compat_aligned_value_p (type))
9309 align = PARM_BOUNDARY;
9312 if (align > BIGGEST_ALIGNMENT)
9313 align = BIGGEST_ALIGNMENT;
9317 /* Return true when TYPE should be 128bit aligned for 32bit argument
9321 ix86_contains_aligned_value_p (const_tree type)
9323 machine_mode mode = TYPE_MODE (type);
9325 if (mode == XFmode || mode == XCmode)
9328 if (TYPE_ALIGN (type) < 128)
9331 if (AGGREGATE_TYPE_P (type))
9333 /* Walk the aggregates recursively. */
9334 switch (TREE_CODE (type))
9338 case QUAL_UNION_TYPE:
9342 /* Walk all the structure fields. */
9343 for (field = TYPE_FIELDS (type);
9345 field = DECL_CHAIN (field))
9347 if (TREE_CODE (field) == FIELD_DECL
9348 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
9355 /* Just for use if some languages passes arrays by value. */
9356 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
9365 return TYPE_ALIGN (type) >= 128;
9370 /* Gives the alignment boundary, in bits, of an argument with the
9371 specified mode and type. */
9374 ix86_function_arg_boundary (machine_mode mode, const_tree type)
9379 /* Since the main variant type is used for call, we convert it to
9380 the main variant type. */
9381 type = TYPE_MAIN_VARIANT (type);
9382 align = TYPE_ALIGN (type);
9385 align = GET_MODE_ALIGNMENT (mode);
9386 if (align < PARM_BOUNDARY)
9387 align = PARM_BOUNDARY;
9391 unsigned int saved_align = align;
9395 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
9398 if (mode == XFmode || mode == XCmode)
9399 align = PARM_BOUNDARY;
9401 else if (!ix86_contains_aligned_value_p (type))
9402 align = PARM_BOUNDARY;
9405 align = PARM_BOUNDARY;
9410 && align != ix86_compat_function_arg_boundary (mode, type,
9414 inform (input_location,
9415 "The ABI for passing parameters with %d-byte"
9416 " alignment has changed in GCC 4.6",
9417 align / BITS_PER_UNIT);
9424 /* Return true if N is a possible register number of function value. */
9427 ix86_function_value_regno_p (const unsigned int regno)
9434 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
9437 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
9441 return chkp_function_instrumented_p (current_function_decl);
9443 /* Complex values are returned in %st(0)/%st(1) pair. */
9446 /* TODO: The function should depend on current function ABI but
9447 builtins.c would need updating then. Therefore we use the
9449 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
9451 return TARGET_FLOAT_RETURNS_IN_80387;
9453 /* Complex values are returned in %xmm0/%xmm1 pair. */
9459 if (TARGET_MACHO || TARGET_64BIT)
9467 /* Define how to find the value returned by a function.
9468 VALTYPE is the data type of the value (as a tree).
9469 If the precise function being called is known, FUNC is its FUNCTION_DECL;
9470 otherwise, FUNC is 0. */
9473 function_value_32 (machine_mode orig_mode, machine_mode mode,
9474 const_tree fntype, const_tree fn)
9478 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
9479 we normally prevent this case when mmx is not available. However
9480 some ABIs may require the result to be returned like DImode. */
9481 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
9482 regno = FIRST_MMX_REG;
9484 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
9485 we prevent this case when sse is not available. However some ABIs
9486 may require the result to be returned like integer TImode. */
9487 else if (mode == TImode
9488 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
9489 regno = FIRST_SSE_REG;
9491 /* 32-byte vector modes in %ymm0. */
9492 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
9493 regno = FIRST_SSE_REG;
9495 /* 64-byte vector modes in %zmm0. */
9496 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
9497 regno = FIRST_SSE_REG;
9499 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
9500 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
9501 regno = FIRST_FLOAT_REG;
9503 /* Most things go in %eax. */
9506 /* Override FP return register with %xmm0 for local functions when
9507 SSE math is enabled or for functions with sseregparm attribute. */
9508 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
9510 int sse_level = ix86_function_sseregparm (fntype, fn, false);
9511 if (sse_level == -1)
9513 error ("calling %qD with SSE caling convention without "
9514 "SSE/SSE2 enabled", fn);
9515 sorry ("this is a GCC bug that can be worked around by adding "
9516 "attribute used to function called");
9518 else if ((sse_level >= 1 && mode == SFmode)
9519 || (sse_level == 2 && mode == DFmode))
9520 regno = FIRST_SSE_REG;
9523 /* OImode shouldn't be used directly. */
9524 gcc_assert (mode != OImode);
9526 return gen_rtx_REG (orig_mode, regno);
9530 function_value_64 (machine_mode orig_mode, machine_mode mode,
9535 /* Handle libcalls, which don't provide a type node. */
9536 if (valtype == NULL)
9550 regno = FIRST_SSE_REG;
9554 regno = FIRST_FLOAT_REG;
9562 return gen_rtx_REG (mode, regno);
9564 else if (POINTER_TYPE_P (valtype))
9566 /* Pointers are always returned in word_mode. */
9570 ret = construct_container (mode, orig_mode, valtype, 1,
9571 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
9572 x86_64_int_return_registers, 0);
9574 /* For zero sized structures, construct_container returns NULL, but we
9575 need to keep rest of compiler happy by returning meaningful value. */
9577 ret = gen_rtx_REG (orig_mode, AX_REG);
9583 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
9586 unsigned int regno = AX_REG;
9590 switch (GET_MODE_SIZE (mode))
9593 if (valtype != NULL_TREE
9594 && !VECTOR_INTEGER_TYPE_P (valtype)
9595 && !VECTOR_INTEGER_TYPE_P (valtype)
9596 && !INTEGRAL_TYPE_P (valtype)
9597 && !VECTOR_FLOAT_TYPE_P (valtype))
9599 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9600 && !COMPLEX_MODE_P (mode))
9601 regno = FIRST_SSE_REG;
9605 if (mode == SFmode || mode == DFmode)
9606 regno = FIRST_SSE_REG;
9612 return gen_rtx_REG (orig_mode, regno);
9616 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
9617 machine_mode orig_mode, machine_mode mode)
9619 const_tree fn, fntype;
9622 if (fntype_or_decl && DECL_P (fntype_or_decl))
9623 fn = fntype_or_decl;
9624 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
9626 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
9627 || POINTER_BOUNDS_MODE_P (mode))
9628 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
9629 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
9630 return function_value_ms_64 (orig_mode, mode, valtype);
9631 else if (TARGET_64BIT)
9632 return function_value_64 (orig_mode, mode, valtype);
9634 return function_value_32 (orig_mode, mode, fntype, fn);
9638 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
9640 machine_mode mode, orig_mode;
9642 orig_mode = TYPE_MODE (valtype);
9643 mode = type_natural_mode (valtype, NULL, true);
9644 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
9647 /* Return an RTX representing a place where a function returns
9648 or recieves pointer bounds or NULL if no bounds are returned.
9650 VALTYPE is a data type of a value returned by the function.
9652 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
9653 or FUNCTION_TYPE of the function.
9655 If OUTGOING is false, return a place in which the caller will
9656 see the return value. Otherwise, return a place where a
9657 function returns a value. */
9660 ix86_function_value_bounds (const_tree valtype,
9661 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
9662 bool outgoing ATTRIBUTE_UNUSED)
9666 if (BOUNDED_TYPE_P (valtype))
9667 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
9668 else if (chkp_type_has_pointer (valtype))
9673 unsigned i, bnd_no = 0;
9675 bitmap_obstack_initialize (NULL);
9676 slots = BITMAP_ALLOC (NULL);
9677 chkp_find_bound_slots (valtype, slots);
9679 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
9681 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
9682 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
9683 gcc_assert (bnd_no < 2);
9684 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
9687 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
9689 BITMAP_FREE (slots);
9690 bitmap_obstack_release (NULL);
9698 /* Pointer function arguments and return values are promoted to
9702 ix86_promote_function_mode (const_tree type, machine_mode mode,
9703 int *punsignedp, const_tree fntype,
9706 if (type != NULL_TREE && POINTER_TYPE_P (type))
9708 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9711 return default_promote_function_mode (type, mode, punsignedp, fntype,
9715 /* Return true if a structure, union or array with MODE containing FIELD
9716 should be accessed using BLKmode. */
9719 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
9721 /* Union with XFmode must be in BLKmode. */
9722 return (mode == XFmode
9723 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
9724 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
9728 ix86_libcall_value (machine_mode mode)
9730 return ix86_function_value_1 (NULL, NULL, mode, mode);
9733 /* Return true iff type is returned in memory. */
9736 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9738 #ifdef SUBTARGET_RETURN_IN_MEMORY
9739 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
9741 const machine_mode mode = type_natural_mode (type, NULL, true);
9744 if (POINTER_BOUNDS_TYPE_P (type))
9749 if (ix86_function_type_abi (fntype) == MS_ABI)
9751 size = int_size_in_bytes (type);
9753 /* __m128 is returned in xmm0. */
9754 if ((!type || VECTOR_INTEGER_TYPE_P (type)
9755 || INTEGRAL_TYPE_P (type)
9756 || VECTOR_FLOAT_TYPE_P (type))
9757 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
9758 && !COMPLEX_MODE_P (mode)
9759 && (GET_MODE_SIZE (mode) == 16 || size == 16))
9762 /* Otherwise, the size must be exactly in [1248]. */
9763 return size != 1 && size != 2 && size != 4 && size != 8;
9767 int needed_intregs, needed_sseregs;
9769 return examine_argument (mode, type, 1,
9770 &needed_intregs, &needed_sseregs);
9775 size = int_size_in_bytes (type);
9777 /* Intel MCU psABI returns scalars and aggregates no larger than 8
9778 bytes in registers. */
9780 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
9782 if (mode == BLKmode)
9785 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
9788 if (VECTOR_MODE_P (mode) || mode == TImode)
9790 /* User-created vectors small enough to fit in EAX. */
9794 /* Unless ABI prescibes otherwise,
9795 MMX/3dNow values are returned in MM0 if available. */
9798 return TARGET_VECT8_RETURNS || !TARGET_MMX;
9800 /* SSE values are returned in XMM0 if available. */
9804 /* AVX values are returned in YMM0 if available. */
9808 /* AVX512F values are returned in ZMM0 if available. */
9810 return !TARGET_AVX512F;
9819 /* OImode shouldn't be used directly. */
9820 gcc_assert (mode != OImode);
9828 /* Create the va_list data type. */
9831 ix86_build_builtin_va_list_64 (void)
9833 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9835 record = lang_hooks.types.make_type (RECORD_TYPE);
9836 type_decl = build_decl (BUILTINS_LOCATION,
9837 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9839 f_gpr = build_decl (BUILTINS_LOCATION,
9840 FIELD_DECL, get_identifier ("gp_offset"),
9841 unsigned_type_node);
9842 f_fpr = build_decl (BUILTINS_LOCATION,
9843 FIELD_DECL, get_identifier ("fp_offset"),
9844 unsigned_type_node);
9845 f_ovf = build_decl (BUILTINS_LOCATION,
9846 FIELD_DECL, get_identifier ("overflow_arg_area"),
9848 f_sav = build_decl (BUILTINS_LOCATION,
9849 FIELD_DECL, get_identifier ("reg_save_area"),
9852 va_list_gpr_counter_field = f_gpr;
9853 va_list_fpr_counter_field = f_fpr;
9855 DECL_FIELD_CONTEXT (f_gpr) = record;
9856 DECL_FIELD_CONTEXT (f_fpr) = record;
9857 DECL_FIELD_CONTEXT (f_ovf) = record;
9858 DECL_FIELD_CONTEXT (f_sav) = record;
9860 TYPE_STUB_DECL (record) = type_decl;
9861 TYPE_NAME (record) = type_decl;
9862 TYPE_FIELDS (record) = f_gpr;
9863 DECL_CHAIN (f_gpr) = f_fpr;
9864 DECL_CHAIN (f_fpr) = f_ovf;
9865 DECL_CHAIN (f_ovf) = f_sav;
9867 layout_type (record);
9869 /* The correct type is an array type of one element. */
9870 return build_array_type (record, build_index_type (size_zero_node));
9873 /* Setup the builtin va_list data type and for 64-bit the additional
9874 calling convention specific va_list data types. */
9877 ix86_build_builtin_va_list (void)
9881 /* Initialize ABI specific va_list builtin types. */
9882 tree sysv_va_list, ms_va_list;
9884 sysv_va_list = ix86_build_builtin_va_list_64 ();
9885 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
9887 /* For MS_ABI we use plain pointer to argument area. */
9888 ms_va_list = build_pointer_type (char_type_node);
9889 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
9891 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
9895 /* For i386 we use plain pointer to argument area. */
9896 return build_pointer_type (char_type_node);
9900 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
9903 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
9909 /* GPR size of varargs save area. */
9910 if (cfun->va_list_gpr_size)
9911 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
9913 ix86_varargs_gpr_size = 0;
9915 /* FPR size of varargs save area. We don't need it if we don't pass
9916 anything in SSE registers. */
9917 if (TARGET_SSE && cfun->va_list_fpr_size)
9918 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
9920 ix86_varargs_fpr_size = 0;
9922 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
9925 save_area = frame_pointer_rtx;
9926 set = get_varargs_alias_set ();
9928 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
9929 if (max > X86_64_REGPARM_MAX)
9930 max = X86_64_REGPARM_MAX;
9932 for (i = cum->regno; i < max; i++)
9934 mem = gen_rtx_MEM (word_mode,
9935 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
9936 MEM_NOTRAP_P (mem) = 1;
9937 set_mem_alias_set (mem, set);
9938 emit_move_insn (mem,
9939 gen_rtx_REG (word_mode,
9940 x86_64_int_parameter_registers[i]));
9943 if (ix86_varargs_fpr_size)
9946 rtx_code_label *label;
9949 /* Now emit code to save SSE registers. The AX parameter contains number
9950 of SSE parameter registers used to call this function, though all we
9951 actually check here is the zero/non-zero status. */
9953 label = gen_label_rtx ();
9954 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
9955 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
9958 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
9959 we used movdqa (i.e. TImode) instead? Perhaps even better would
9960 be if we could determine the real mode of the data, via a hook
9961 into pass_stdarg. Ignore all that for now. */
9963 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
9964 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
9966 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
9967 if (max > X86_64_SSE_REGPARM_MAX)
9968 max = X86_64_SSE_REGPARM_MAX;
9970 for (i = cum->sse_regno; i < max; ++i)
9972 mem = plus_constant (Pmode, save_area,
9973 i * 16 + ix86_varargs_gpr_size);
9974 mem = gen_rtx_MEM (smode, mem);
9975 MEM_NOTRAP_P (mem) = 1;
9976 set_mem_alias_set (mem, set);
9977 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
9979 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
9987 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
9989 alias_set_type set = get_varargs_alias_set ();
9992 /* Reset to zero, as there might be a sysv vaarg used
9994 ix86_varargs_gpr_size = 0;
9995 ix86_varargs_fpr_size = 0;
9997 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
10001 mem = gen_rtx_MEM (Pmode,
10002 plus_constant (Pmode, virtual_incoming_args_rtx,
10003 i * UNITS_PER_WORD));
10004 MEM_NOTRAP_P (mem) = 1;
10005 set_mem_alias_set (mem, set);
10007 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
10008 emit_move_insn (mem, reg);
10013 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
10014 tree type, int *, int no_rtl)
10016 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10017 CUMULATIVE_ARGS next_cum;
10020 /* This argument doesn't appear to be used anymore. Which is good,
10021 because the old code here didn't suppress rtl generation. */
10022 gcc_assert (!no_rtl);
10027 fntype = TREE_TYPE (current_function_decl);
10029 /* For varargs, we do not want to skip the dummy va_dcl argument.
10030 For stdargs, we do want to skip the last named argument. */
10032 if (stdarg_p (fntype))
10033 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10036 if (cum->call_abi == MS_ABI)
10037 setup_incoming_varargs_ms_64 (&next_cum);
10039 setup_incoming_varargs_64 (&next_cum);
10043 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
10044 enum machine_mode mode,
10046 int *pretend_size ATTRIBUTE_UNUSED,
10049 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10050 CUMULATIVE_ARGS next_cum;
10053 int bnd_reg, i, max;
10055 gcc_assert (!no_rtl);
10057 /* Do nothing if we use plain pointer to argument area. */
10058 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
10061 fntype = TREE_TYPE (current_function_decl);
10063 /* For varargs, we do not want to skip the dummy va_dcl argument.
10064 For stdargs, we do want to skip the last named argument. */
10066 if (stdarg_p (fntype))
10067 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
10069 save_area = frame_pointer_rtx;
10071 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
10072 if (max > X86_64_REGPARM_MAX)
10073 max = X86_64_REGPARM_MAX;
10075 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
10076 if (chkp_function_instrumented_p (current_function_decl))
10077 for (i = cum->regno; i < max; i++)
10079 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
10080 rtx ptr = gen_rtx_REG (Pmode,
10081 x86_64_int_parameter_registers[i]);
10084 if (bnd_reg <= LAST_BND_REG)
10085 bounds = gen_rtx_REG (BNDmode, bnd_reg);
10089 plus_constant (Pmode, arg_pointer_rtx,
10090 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
10091 bounds = gen_reg_rtx (BNDmode);
10092 emit_insn (BNDmode == BND64mode
10093 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
10094 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
10097 emit_insn (BNDmode == BND64mode
10098 ? gen_bnd64_stx (addr, ptr, bounds)
10099 : gen_bnd32_stx (addr, ptr, bounds));
10106 /* Checks if TYPE is of kind va_list char *. */
10109 is_va_list_char_pointer (tree type)
10113 /* For 32-bit it is always true. */
10116 canonic = ix86_canonical_va_list_type (type);
10117 return (canonic == ms_va_list_type_node
10118 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
10121 /* Implement va_start. */
10124 ix86_va_start (tree valist, rtx nextarg)
10126 HOST_WIDE_INT words, n_gpr, n_fpr;
10127 tree f_gpr, f_fpr, f_ovf, f_sav;
10128 tree gpr, fpr, ovf, sav, t;
10132 if (flag_split_stack
10133 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10135 unsigned int scratch_regno;
10137 /* When we are splitting the stack, we can't refer to the stack
10138 arguments using internal_arg_pointer, because they may be on
10139 the old stack. The split stack prologue will arrange to
10140 leave a pointer to the old stack arguments in a scratch
10141 register, which we here copy to a pseudo-register. The split
10142 stack prologue can't set the pseudo-register directly because
10143 it (the prologue) runs before any registers have been saved. */
10145 scratch_regno = split_stack_prologue_scratch_regno ();
10146 if (scratch_regno != INVALID_REGNUM)
10151 reg = gen_reg_rtx (Pmode);
10152 cfun->machine->split_stack_varargs_pointer = reg;
10155 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
10156 seq = get_insns ();
10159 push_topmost_sequence ();
10160 emit_insn_after (seq, entry_of_function ());
10161 pop_topmost_sequence ();
10165 /* Only 64bit target needs something special. */
10166 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10168 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10169 std_expand_builtin_va_start (valist, nextarg);
10174 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
10175 next = expand_binop (ptr_mode, add_optab,
10176 cfun->machine->split_stack_varargs_pointer,
10177 crtl->args.arg_offset_rtx,
10178 NULL_RTX, 0, OPTAB_LIB_WIDEN);
10179 convert_move (va_r, next, 0);
10181 /* Store zero bounds for va_list. */
10182 if (chkp_function_instrumented_p (current_function_decl))
10183 chkp_expand_bounds_reset_for_mem (valist,
10184 make_tree (TREE_TYPE (valist),
10191 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10192 f_fpr = DECL_CHAIN (f_gpr);
10193 f_ovf = DECL_CHAIN (f_fpr);
10194 f_sav = DECL_CHAIN (f_ovf);
10196 valist = build_simple_mem_ref (valist);
10197 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
10198 /* The following should be folded into the MEM_REF offset. */
10199 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
10201 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10203 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10205 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10208 /* Count number of gp and fp argument registers used. */
10209 words = crtl->args.info.words;
10210 n_gpr = crtl->args.info.regno;
10211 n_fpr = crtl->args.info.sse_regno;
10213 if (cfun->va_list_gpr_size)
10215 type = TREE_TYPE (gpr);
10216 t = build2 (MODIFY_EXPR, type,
10217 gpr, build_int_cst (type, n_gpr * 8));
10218 TREE_SIDE_EFFECTS (t) = 1;
10219 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10222 if (TARGET_SSE && cfun->va_list_fpr_size)
10224 type = TREE_TYPE (fpr);
10225 t = build2 (MODIFY_EXPR, type, fpr,
10226 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
10227 TREE_SIDE_EFFECTS (t) = 1;
10228 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10231 /* Find the overflow area. */
10232 type = TREE_TYPE (ovf);
10233 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
10234 ovf_rtx = crtl->args.internal_arg_pointer;
10236 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
10237 t = make_tree (type, ovf_rtx);
10239 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10241 /* Store zero bounds for overflow area pointer. */
10242 if (chkp_function_instrumented_p (current_function_decl))
10243 chkp_expand_bounds_reset_for_mem (ovf, t);
10245 t = build2 (MODIFY_EXPR, type, ovf, t);
10246 TREE_SIDE_EFFECTS (t) = 1;
10247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10249 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
10251 /* Find the register save area.
10252 Prologue of the function save it right above stack frame. */
10253 type = TREE_TYPE (sav);
10254 t = make_tree (type, frame_pointer_rtx);
10255 if (!ix86_varargs_gpr_size)
10256 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
10258 /* Store zero bounds for save area pointer. */
10259 if (chkp_function_instrumented_p (current_function_decl))
10260 chkp_expand_bounds_reset_for_mem (sav, t);
10262 t = build2 (MODIFY_EXPR, type, sav, t);
10263 TREE_SIDE_EFFECTS (t) = 1;
10264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10268 /* Implement va_arg. */
10271 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10272 gimple_seq *post_p)
10274 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
10275 tree f_gpr, f_fpr, f_ovf, f_sav;
10276 tree gpr, fpr, ovf, sav, t;
10278 tree lab_false, lab_over = NULL_TREE;
10281 int indirect_p = 0;
10283 machine_mode nat_mode;
10284 unsigned int arg_boundary;
10286 /* Only 64bit target needs something special. */
10287 if (is_va_list_char_pointer (TREE_TYPE (valist)))
10288 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10290 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
10291 f_fpr = DECL_CHAIN (f_gpr);
10292 f_ovf = DECL_CHAIN (f_fpr);
10293 f_sav = DECL_CHAIN (f_ovf);
10295 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
10296 valist, f_gpr, NULL_TREE);
10298 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10299 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10300 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10302 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
10304 type = build_pointer_type (type);
10305 size = int_size_in_bytes (type);
10306 rsize = CEIL (size, UNITS_PER_WORD);
10308 nat_mode = type_natural_mode (type, NULL, false);
10323 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10324 if (!TARGET_64BIT_MS_ABI)
10331 container = construct_container (nat_mode, TYPE_MODE (type),
10332 type, 0, X86_64_REGPARM_MAX,
10333 X86_64_SSE_REGPARM_MAX, intreg,
10338 /* Pull the value out of the saved registers. */
10340 addr = create_tmp_var (ptr_type_node, "addr");
10344 int needed_intregs, needed_sseregs;
10346 tree int_addr, sse_addr;
10348 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10349 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10351 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
10353 need_temp = (!REG_P (container)
10354 && ((needed_intregs && TYPE_ALIGN (type) > 64)
10355 || TYPE_ALIGN (type) > 128));
10357 /* In case we are passing structure, verify that it is consecutive block
10358 on the register save area. If not we need to do moves. */
10359 if (!need_temp && !REG_P (container))
10361 /* Verify that all registers are strictly consecutive */
10362 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
10366 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10368 rtx slot = XVECEXP (container, 0, i);
10369 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
10370 || INTVAL (XEXP (slot, 1)) != i * 16)
10378 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
10380 rtx slot = XVECEXP (container, 0, i);
10381 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
10382 || INTVAL (XEXP (slot, 1)) != i * 8)
10394 int_addr = create_tmp_var (ptr_type_node, "int_addr");
10395 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
10398 /* First ensure that we fit completely in registers. */
10399 if (needed_intregs)
10401 t = build_int_cst (TREE_TYPE (gpr),
10402 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
10403 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
10404 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10405 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10406 gimplify_and_add (t, pre_p);
10408 if (needed_sseregs)
10410 t = build_int_cst (TREE_TYPE (fpr),
10411 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
10412 + X86_64_REGPARM_MAX * 8);
10413 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
10414 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
10415 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
10416 gimplify_and_add (t, pre_p);
10419 /* Compute index to start of area used for integer regs. */
10420 if (needed_intregs)
10422 /* int_addr = gpr + sav; */
10423 t = fold_build_pointer_plus (sav, gpr);
10424 gimplify_assign (int_addr, t, pre_p);
10426 if (needed_sseregs)
10428 /* sse_addr = fpr + sav; */
10429 t = fold_build_pointer_plus (sav, fpr);
10430 gimplify_assign (sse_addr, t, pre_p);
10434 int i, prev_size = 0;
10435 tree temp = create_tmp_var (type, "va_arg_tmp");
10437 /* addr = &temp; */
10438 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
10439 gimplify_assign (addr, t, pre_p);
10441 for (i = 0; i < XVECLEN (container, 0); i++)
10443 rtx slot = XVECEXP (container, 0, i);
10444 rtx reg = XEXP (slot, 0);
10445 machine_mode mode = GET_MODE (reg);
10449 tree src_addr, src;
10451 tree dest_addr, dest;
10452 int cur_size = GET_MODE_SIZE (mode);
10454 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
10455 prev_size = INTVAL (XEXP (slot, 1));
10456 if (prev_size + cur_size > size)
10458 cur_size = size - prev_size;
10459 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
10460 if (mode == BLKmode)
10463 piece_type = lang_hooks.types.type_for_mode (mode, 1);
10464 if (mode == GET_MODE (reg))
10465 addr_type = build_pointer_type (piece_type);
10467 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10469 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
10472 if (SSE_REGNO_P (REGNO (reg)))
10474 src_addr = sse_addr;
10475 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
10479 src_addr = int_addr;
10480 src_offset = REGNO (reg) * 8;
10482 src_addr = fold_convert (addr_type, src_addr);
10483 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
10485 dest_addr = fold_convert (daddr_type, addr);
10486 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
10487 if (cur_size == GET_MODE_SIZE (mode))
10489 src = build_va_arg_indirect_ref (src_addr);
10490 dest = build_va_arg_indirect_ref (dest_addr);
10492 gimplify_assign (dest, src, pre_p);
10497 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10498 3, dest_addr, src_addr,
10499 size_int (cur_size));
10500 gimplify_and_add (copy, pre_p);
10502 prev_size += cur_size;
10506 if (needed_intregs)
10508 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
10509 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
10510 gimplify_assign (gpr, t, pre_p);
10513 if (needed_sseregs)
10515 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
10516 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
10517 gimplify_assign (unshare_expr (fpr), t, pre_p);
10520 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10522 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10525 /* ... otherwise out of the overflow area. */
10527 /* When we align parameter on stack for caller, if the parameter
10528 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
10529 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
10530 here with caller. */
10531 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
10532 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10533 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10535 /* Care for on-stack alignment if needed. */
10536 if (arg_boundary <= 64 || size == 0)
10540 HOST_WIDE_INT align = arg_boundary / 8;
10541 t = fold_build_pointer_plus_hwi (ovf, align - 1);
10542 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10543 build_int_cst (TREE_TYPE (t), -align));
10546 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10547 gimplify_assign (addr, t, pre_p);
10549 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
10550 gimplify_assign (unshare_expr (ovf), t, pre_p);
10553 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10555 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10556 addr = fold_convert (ptrtype, addr);
10559 addr = build_va_arg_indirect_ref (addr);
10560 return build_va_arg_indirect_ref (addr);
10563 /* Return true if OPNUM's MEM should be matched
10564 in movabs* patterns. */
10567 ix86_check_movabs (rtx insn, int opnum)
10571 set = PATTERN (insn);
10572 if (GET_CODE (set) == PARALLEL)
10573 set = XVECEXP (set, 0, 0);
10574 gcc_assert (GET_CODE (set) == SET);
10575 mem = XEXP (set, opnum);
10576 while (SUBREG_P (mem))
10577 mem = SUBREG_REG (mem);
10578 gcc_assert (MEM_P (mem));
10579 return volatile_ok || !MEM_VOLATILE_P (mem);
10582 /* Return false if INSN contains a MEM with a non-default address space. */
10584 ix86_check_no_addr_space (rtx insn)
10586 subrtx_var_iterator::array_type array;
10587 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
10590 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
10596 /* Initialize the table of extra 80387 mathematical constants. */
10599 init_ext_80387_constants (void)
10601 static const char * cst[5] =
10603 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
10604 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
10605 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
10606 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
10607 "3.1415926535897932385128089594061862044", /* 4: fldpi */
10611 for (i = 0; i < 5; i++)
10613 real_from_string (&ext_80387_constants_table[i], cst[i]);
10614 /* Ensure each constant is rounded to XFmode precision. */
10615 real_convert (&ext_80387_constants_table[i],
10616 XFmode, &ext_80387_constants_table[i]);
10619 ext_80387_constants_init = 1;
10622 /* Return non-zero if the constant is something that
10623 can be loaded with a special instruction. */
10626 standard_80387_constant_p (rtx x)
10628 machine_mode mode = GET_MODE (x);
10630 const REAL_VALUE_TYPE *r;
10632 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
10635 if (x == CONST0_RTX (mode))
10637 if (x == CONST1_RTX (mode))
10640 r = CONST_DOUBLE_REAL_VALUE (x);
10642 /* For XFmode constants, try to find a special 80387 instruction when
10643 optimizing for size or on those CPUs that benefit from them. */
10645 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
10649 if (! ext_80387_constants_init)
10650 init_ext_80387_constants ();
10652 for (i = 0; i < 5; i++)
10653 if (real_identical (r, &ext_80387_constants_table[i]))
10657 /* Load of the constant -0.0 or -1.0 will be split as
10658 fldz;fchs or fld1;fchs sequence. */
10659 if (real_isnegzero (r))
10661 if (real_identical (r, &dconstm1))
10667 /* Return the opcode of the special instruction to be used to load
10671 standard_80387_constant_opcode (rtx x)
10673 switch (standard_80387_constant_p (x))
10693 gcc_unreachable ();
10697 /* Return the CONST_DOUBLE representing the 80387 constant that is
10698 loaded by the specified special instruction. The argument IDX
10699 matches the return value from standard_80387_constant_p. */
10702 standard_80387_constant_rtx (int idx)
10706 if (! ext_80387_constants_init)
10707 init_ext_80387_constants ();
10720 gcc_unreachable ();
10723 return const_double_from_real_value (ext_80387_constants_table[i],
10727 /* Return 1 if X is all 0s and 2 if x is all 1s
10728 in supported SSE/AVX vector mode. */
10731 standard_sse_constant_p (rtx x)
10738 mode = GET_MODE (x);
10740 if (x == const0_rtx || x == CONST0_RTX (mode))
10742 if (vector_all_ones_operand (x, mode))
10761 if (TARGET_AVX512F)
10770 /* Return the opcode of the special instruction to be used to load
10774 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
10776 switch (standard_sse_constant_p (x))
10779 switch (get_attr_mode (insn))
10782 return "vpxord\t%g0, %g0, %g0";
10784 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
10785 : "vpxord\t%g0, %g0, %g0";
10787 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
10788 : "vpxorq\t%g0, %g0, %g0";
10790 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
10791 : "%vpxor\t%0, %d0";
10793 return "%vxorpd\t%0, %d0";
10795 return "%vxorps\t%0, %d0";
10798 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
10799 : "vpxor\t%x0, %x0, %x0";
10801 return "vxorpd\t%x0, %x0, %x0";
10803 return "vxorps\t%x0, %x0, %x0";
10810 if (TARGET_AVX512VL
10811 || get_attr_mode (insn) == MODE_XI
10812 || get_attr_mode (insn) == MODE_V8DF
10813 || get_attr_mode (insn) == MODE_V16SF)
10814 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
10816 return "vpcmpeqd\t%0, %0, %0";
10818 return "pcmpeqd\t%0, %0";
10823 gcc_unreachable ();
10826 /* Returns true if OP contains a symbol reference */
10829 symbolic_reference_mentioned_p (rtx op)
10834 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
10837 fmt = GET_RTX_FORMAT (GET_CODE (op));
10838 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
10844 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
10845 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
10849 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
10856 /* Return true if it is appropriate to emit `ret' instructions in the
10857 body of a function. Do this only if the epilogue is simple, needing a
10858 couple of insns. Prior to reloading, we can't tell how many registers
10859 must be saved, so return false then. Return false if there is no frame
10860 marker to de-allocate. */
10863 ix86_can_use_return_insn_p (void)
10865 struct ix86_frame frame;
10867 if (! reload_completed || frame_pointer_needed)
10870 /* Don't allow more than 32k pop, since that's all we can do
10871 with one instruction. */
10872 if (crtl->args.pops_args && crtl->args.size >= 32768)
10875 ix86_compute_frame_layout (&frame);
10876 return (frame.stack_pointer_offset == UNITS_PER_WORD
10877 && (frame.nregs + frame.nsseregs) == 0);
10880 /* Value should be nonzero if functions must have frame pointers.
10881 Zero means the frame pointer need not be set up (and parms may
10882 be accessed via the stack pointer) in functions that seem suitable. */
10885 ix86_frame_pointer_required (void)
10887 /* If we accessed previous frames, then the generated code expects
10888 to be able to access the saved ebp value in our frame. */
10889 if (cfun->machine->accesses_prev_frame)
10892 /* Several x86 os'es need a frame pointer for other reasons,
10893 usually pertaining to setjmp. */
10894 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10897 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
10898 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
10901 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
10902 allocation is 4GB. */
10903 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
10906 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
10907 turns off the frame pointer by default. Turn it back on now if
10908 we've not got a leaf function. */
10909 if (TARGET_OMIT_LEAF_FRAME_POINTER
10911 || ix86_current_function_calls_tls_descriptor))
10914 if (crtl->profile && !flag_fentry)
10920 /* Record that the current function accesses previous call frames. */
10923 ix86_setup_frame_addresses (void)
10925 cfun->machine->accesses_prev_frame = 1;
10928 #ifndef USE_HIDDEN_LINKONCE
10929 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
10930 # define USE_HIDDEN_LINKONCE 1
10932 # define USE_HIDDEN_LINKONCE 0
10936 static int pic_labels_used;
10938 /* Fills in the label name that should be used for a pc thunk for
10939 the given register. */
10942 get_pc_thunk_name (char name[32], unsigned int regno)
10944 gcc_assert (!TARGET_64BIT);
10946 if (USE_HIDDEN_LINKONCE)
10947 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
10949 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
10953 /* This function generates code for -fpic that loads %ebx with
10954 the return address of the caller and then returns. */
10957 ix86_code_end (void)
10962 for (regno = AX_REG; regno <= SP_REG; regno++)
10967 if (!(pic_labels_used & (1 << regno)))
10970 get_pc_thunk_name (name, regno);
10972 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
10973 get_identifier (name),
10974 build_function_type_list (void_type_node, NULL_TREE));
10975 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
10976 NULL_TREE, void_type_node);
10977 TREE_PUBLIC (decl) = 1;
10978 TREE_STATIC (decl) = 1;
10979 DECL_IGNORED_P (decl) = 1;
10984 switch_to_section (darwin_sections[text_coal_section]);
10985 fputs ("\t.weak_definition\t", asm_out_file);
10986 assemble_name (asm_out_file, name);
10987 fputs ("\n\t.private_extern\t", asm_out_file);
10988 assemble_name (asm_out_file, name);
10989 putc ('\n', asm_out_file);
10990 ASM_OUTPUT_LABEL (asm_out_file, name);
10991 DECL_WEAK (decl) = 1;
10995 if (USE_HIDDEN_LINKONCE)
10997 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
10999 targetm.asm_out.unique_section (decl, 0);
11000 switch_to_section (get_named_section (decl, NULL, 0));
11002 targetm.asm_out.globalize_label (asm_out_file, name);
11003 fputs ("\t.hidden\t", asm_out_file);
11004 assemble_name (asm_out_file, name);
11005 putc ('\n', asm_out_file);
11006 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
11010 switch_to_section (text_section);
11011 ASM_OUTPUT_LABEL (asm_out_file, name);
11014 DECL_INITIAL (decl) = make_node (BLOCK);
11015 current_function_decl = decl;
11016 allocate_struct_function (decl, false);
11017 init_function_start (decl);
11018 first_function_block_is_cold = false;
11019 /* Make sure unwind info is emitted for the thunk if needed. */
11020 final_start_function (emit_barrier (), asm_out_file, 1);
11022 /* Pad stack IP move with 4 instructions (two NOPs count
11023 as one instruction). */
11024 if (TARGET_PAD_SHORT_FUNCTION)
11029 fputs ("\tnop\n", asm_out_file);
11032 xops[0] = gen_rtx_REG (Pmode, regno);
11033 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11034 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
11035 output_asm_insn ("%!ret", NULL);
11036 final_end_function ();
11037 init_insn_lengths ();
11038 free_after_compilation (cfun);
11040 current_function_decl = NULL;
11043 if (flag_split_stack)
11044 file_end_indicate_split_stack ();
11047 /* Emit code for the SET_GOT patterns. */
11050 output_set_got (rtx dest, rtx label)
11056 if (TARGET_VXWORKS_RTP && flag_pic)
11058 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
11059 xops[2] = gen_rtx_MEM (Pmode,
11060 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
11061 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
11063 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
11064 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
11065 an unadorned address. */
11066 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
11067 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
11068 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
11072 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
11077 /* We don't need a pic base, we're not producing pic. */
11078 gcc_unreachable ();
11080 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
11081 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
11082 targetm.asm_out.internal_label (asm_out_file, "L",
11083 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
11088 get_pc_thunk_name (name, REGNO (dest));
11089 pic_labels_used |= 1 << REGNO (dest);
11091 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
11092 xops[2] = gen_rtx_MEM (QImode, xops[2]);
11093 output_asm_insn ("%!call\t%X2", xops);
11096 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
11097 This is what will be referenced by the Mach-O PIC subsystem. */
11098 if (machopic_should_output_picbase_label () || !label)
11099 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
11101 /* When we are restoring the pic base at the site of a nonlocal label,
11102 and we decided to emit the pic base above, we will still output a
11103 local label used for calculating the correction offset (even though
11104 the offset will be 0 in that case). */
11106 targetm.asm_out.internal_label (asm_out_file, "L",
11107 CODE_LABEL_NUMBER (label));
11112 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
11117 /* Generate an "push" pattern for input ARG. */
11122 struct machine_function *m = cfun->machine;
11124 if (m->fs.cfa_reg == stack_pointer_rtx)
11125 m->fs.cfa_offset += UNITS_PER_WORD;
11126 m->fs.sp_offset += UNITS_PER_WORD;
11128 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11129 arg = gen_rtx_REG (word_mode, REGNO (arg));
11131 return gen_rtx_SET (gen_rtx_MEM (word_mode,
11132 gen_rtx_PRE_DEC (Pmode,
11133 stack_pointer_rtx)),
11137 /* Generate an "pop" pattern for input ARG. */
11142 if (REG_P (arg) && GET_MODE (arg) != word_mode)
11143 arg = gen_rtx_REG (word_mode, REGNO (arg));
11145 return gen_rtx_SET (arg,
11146 gen_rtx_MEM (word_mode,
11147 gen_rtx_POST_INC (Pmode,
11148 stack_pointer_rtx)));
11151 /* Return >= 0 if there is an unused call-clobbered register available
11152 for the entire function. */
11154 static unsigned int
11155 ix86_select_alt_pic_regnum (void)
11157 if (ix86_use_pseudo_pic_reg ())
11158 return INVALID_REGNUM;
11162 && !ix86_current_function_calls_tls_descriptor)
11165 /* Can't use the same register for both PIC and DRAP. */
11166 if (crtl->drap_reg)
11167 drap = REGNO (crtl->drap_reg);
11170 for (i = 2; i >= 0; --i)
11171 if (i != drap && !df_regs_ever_live_p (i))
11175 return INVALID_REGNUM;
11178 /* Return TRUE if we need to save REGNO. */
11181 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
11183 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
11184 && pic_offset_table_rtx)
11186 if (ix86_use_pseudo_pic_reg ())
11188 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
11189 _mcount in prologue. */
11190 if (!TARGET_64BIT && flag_pic && crtl->profile)
11193 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11195 || crtl->calls_eh_return
11196 || crtl->uses_const_pool
11197 || cfun->has_nonlocal_label)
11198 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
11201 if (crtl->calls_eh_return && maybe_eh_return)
11206 unsigned test = EH_RETURN_DATA_REGNO (i);
11207 if (test == INVALID_REGNUM)
11215 && regno == REGNO (crtl->drap_reg)
11216 && !cfun->machine->no_drap_save_restore)
11219 return (df_regs_ever_live_p (regno)
11220 && !call_used_regs[regno]
11221 && !fixed_regs[regno]
11222 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
11225 /* Return number of saved general prupose registers. */
11228 ix86_nsaved_regs (void)
11233 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11234 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11239 /* Return number of saved SSE registers. */
11242 ix86_nsaved_sseregs (void)
11247 if (!TARGET_64BIT_MS_ABI)
11249 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11250 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11255 /* Given FROM and TO register numbers, say whether this elimination is
11256 allowed. If stack alignment is needed, we can only replace argument
11257 pointer with hard frame pointer, or replace frame pointer with stack
11258 pointer. Otherwise, frame pointer elimination is automatically
11259 handled and all other eliminations are valid. */
11262 ix86_can_eliminate (const int from, const int to)
11264 if (stack_realign_fp)
11265 return ((from == ARG_POINTER_REGNUM
11266 && to == HARD_FRAME_POINTER_REGNUM)
11267 || (from == FRAME_POINTER_REGNUM
11268 && to == STACK_POINTER_REGNUM));
11270 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
11273 /* Return the offset between two registers, one to be eliminated, and the other
11274 its replacement, at the start of a routine. */
11277 ix86_initial_elimination_offset (int from, int to)
11279 struct ix86_frame frame;
11280 ix86_compute_frame_layout (&frame);
11282 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
11283 return frame.hard_frame_pointer_offset;
11284 else if (from == FRAME_POINTER_REGNUM
11285 && to == HARD_FRAME_POINTER_REGNUM)
11286 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
11289 gcc_assert (to == STACK_POINTER_REGNUM);
11291 if (from == ARG_POINTER_REGNUM)
11292 return frame.stack_pointer_offset;
11294 gcc_assert (from == FRAME_POINTER_REGNUM);
11295 return frame.stack_pointer_offset - frame.frame_pointer_offset;
11299 /* In a dynamically-aligned function, we can't know the offset from
11300 stack pointer to frame pointer, so we must ensure that setjmp
11301 eliminates fp against the hard fp (%ebp) rather than trying to
11302 index from %esp up to the top of the frame across a gap that is
11303 of unknown (at compile-time) size. */
11305 ix86_builtin_setjmp_frame_value (void)
11307 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
11310 /* When using -fsplit-stack, the allocation routines set a field in
11311 the TCB to the bottom of the stack plus this much space, measured
11314 #define SPLIT_STACK_AVAILABLE 256
11316 /* Fill structure ix86_frame about frame of currently computed function. */
11319 ix86_compute_frame_layout (struct ix86_frame *frame)
11321 unsigned HOST_WIDE_INT stack_alignment_needed;
11322 HOST_WIDE_INT offset;
11323 unsigned HOST_WIDE_INT preferred_alignment;
11324 HOST_WIDE_INT size = get_frame_size ();
11325 HOST_WIDE_INT to_allocate;
11327 frame->nregs = ix86_nsaved_regs ();
11328 frame->nsseregs = ix86_nsaved_sseregs ();
11330 /* 64-bit MS ABI seem to require stack alignment to be always 16,
11331 except for function prologues, leaf functions and when the defult
11332 incoming stack boundary is overriden at command line or via
11333 force_align_arg_pointer attribute. */
11334 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
11335 && (!crtl->is_leaf || cfun->calls_alloca != 0
11336 || ix86_current_function_calls_tls_descriptor
11337 || ix86_incoming_stack_boundary < 128))
11339 crtl->preferred_stack_boundary = 128;
11340 crtl->stack_alignment_needed = 128;
11342 /* preferred_stack_boundary is never updated for call
11343 expanded from tls descriptor. Update it here. We don't update it in
11344 expand stage because according to the comments before
11345 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
11347 else if (ix86_current_function_calls_tls_descriptor
11348 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
11350 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
11351 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
11352 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
11355 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
11356 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
11358 gcc_assert (!size || stack_alignment_needed);
11359 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
11360 gcc_assert (preferred_alignment <= stack_alignment_needed);
11362 /* For SEH we have to limit the amount of code movement into the prologue.
11363 At present we do this via a BLOCKAGE, at which point there's very little
11364 scheduling that can be done, which means that there's very little point
11365 in doing anything except PUSHs. */
11367 cfun->machine->use_fast_prologue_epilogue = false;
11369 /* During reload iteration the amount of registers saved can change.
11370 Recompute the value as needed. Do not recompute when amount of registers
11371 didn't change as reload does multiple calls to the function and does not
11372 expect the decision to change within single iteration. */
11373 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
11374 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
11376 int count = frame->nregs;
11377 struct cgraph_node *node = cgraph_node::get (current_function_decl);
11379 cfun->machine->use_fast_prologue_epilogue_nregs = count;
11381 /* The fast prologue uses move instead of push to save registers. This
11382 is significantly longer, but also executes faster as modern hardware
11383 can execute the moves in parallel, but can't do that for push/pop.
11385 Be careful about choosing what prologue to emit: When function takes
11386 many instructions to execute we may use slow version as well as in
11387 case function is known to be outside hot spot (this is known with
11388 feedback only). Weight the size of function by number of registers
11389 to save as it is cheap to use one or two push instructions but very
11390 slow to use many of them. */
11392 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
11393 if (node->frequency < NODE_FREQUENCY_NORMAL
11394 || (flag_branch_probabilities
11395 && node->frequency < NODE_FREQUENCY_HOT))
11396 cfun->machine->use_fast_prologue_epilogue = false;
11398 cfun->machine->use_fast_prologue_epilogue
11399 = !expensive_function_p (count);
11402 frame->save_regs_using_mov
11403 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
11404 /* If static stack checking is enabled and done with probes,
11405 the registers need to be saved before allocating the frame. */
11406 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
11408 /* Skip return address. */
11409 offset = UNITS_PER_WORD;
11411 /* Skip pushed static chain. */
11412 if (ix86_static_chain_on_stack)
11413 offset += UNITS_PER_WORD;
11415 /* Skip saved base pointer. */
11416 if (frame_pointer_needed)
11417 offset += UNITS_PER_WORD;
11418 frame->hfp_save_offset = offset;
11420 /* The traditional frame pointer location is at the top of the frame. */
11421 frame->hard_frame_pointer_offset = offset;
11423 /* Register save area */
11424 offset += frame->nregs * UNITS_PER_WORD;
11425 frame->reg_save_offset = offset;
11427 /* On SEH target, registers are pushed just before the frame pointer
11430 frame->hard_frame_pointer_offset = offset;
11432 /* Align and set SSE register save area. */
11433 if (frame->nsseregs)
11435 /* The only ABI that has saved SSE registers (Win64) also has a
11436 16-byte aligned default stack, and thus we don't need to be
11437 within the re-aligned local stack frame to save them. In case
11438 incoming stack boundary is aligned to less than 16 bytes,
11439 unaligned move of SSE register will be emitted, so there is
11440 no point to round up the SSE register save area outside the
11441 re-aligned local stack frame to 16 bytes. */
11442 if (ix86_incoming_stack_boundary >= 128)
11443 offset = ROUND_UP (offset, 16);
11444 offset += frame->nsseregs * 16;
11446 frame->sse_reg_save_offset = offset;
11448 /* The re-aligned stack starts here. Values before this point are not
11449 directly comparable with values below this point. In order to make
11450 sure that no value happens to be the same before and after, force
11451 the alignment computation below to add a non-zero value. */
11452 if (stack_realign_fp)
11453 offset = ROUND_UP (offset, stack_alignment_needed);
11456 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
11457 offset += frame->va_arg_size;
11459 /* Align start of frame for local function. */
11460 if (stack_realign_fp
11461 || offset != frame->sse_reg_save_offset
11464 || cfun->calls_alloca
11465 || ix86_current_function_calls_tls_descriptor)
11466 offset = ROUND_UP (offset, stack_alignment_needed);
11468 /* Frame pointer points here. */
11469 frame->frame_pointer_offset = offset;
11473 /* Add outgoing arguments area. Can be skipped if we eliminated
11474 all the function calls as dead code.
11475 Skipping is however impossible when function calls alloca. Alloca
11476 expander assumes that last crtl->outgoing_args_size
11477 of stack frame are unused. */
11478 if (ACCUMULATE_OUTGOING_ARGS
11479 && (!crtl->is_leaf || cfun->calls_alloca
11480 || ix86_current_function_calls_tls_descriptor))
11482 offset += crtl->outgoing_args_size;
11483 frame->outgoing_arguments_size = crtl->outgoing_args_size;
11486 frame->outgoing_arguments_size = 0;
11488 /* Align stack boundary. Only needed if we're calling another function
11489 or using alloca. */
11490 if (!crtl->is_leaf || cfun->calls_alloca
11491 || ix86_current_function_calls_tls_descriptor)
11492 offset = ROUND_UP (offset, preferred_alignment);
11494 /* We've reached end of stack frame. */
11495 frame->stack_pointer_offset = offset;
11497 /* Size prologue needs to allocate. */
11498 to_allocate = offset - frame->sse_reg_save_offset;
11500 if ((!to_allocate && frame->nregs <= 1)
11501 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
11502 frame->save_regs_using_mov = false;
11504 if (ix86_using_red_zone ()
11505 && crtl->sp_is_unchanging
11507 && !ix86_current_function_calls_tls_descriptor)
11509 frame->red_zone_size = to_allocate;
11510 if (frame->save_regs_using_mov)
11511 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
11512 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
11513 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
11516 frame->red_zone_size = 0;
11517 frame->stack_pointer_offset -= frame->red_zone_size;
11519 /* The SEH frame pointer location is near the bottom of the frame.
11520 This is enforced by the fact that the difference between the
11521 stack pointer and the frame pointer is limited to 240 bytes in
11522 the unwind data structure. */
11525 HOST_WIDE_INT diff;
11527 /* If we can leave the frame pointer where it is, do so. Also, returns
11528 the establisher frame for __builtin_frame_address (0). */
11529 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
11530 if (diff <= SEH_MAX_FRAME_SIZE
11531 && (diff > 240 || (diff & 15) != 0)
11532 && !crtl->accesses_prior_frames)
11534 /* Ideally we'd determine what portion of the local stack frame
11535 (within the constraint of the lowest 240) is most heavily used.
11536 But without that complication, simply bias the frame pointer
11537 by 128 bytes so as to maximize the amount of the local stack
11538 frame that is addressable with 8-bit offsets. */
11539 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
11544 /* This is semi-inlined memory_address_length, but simplified
11545 since we know that we're always dealing with reg+offset, and
11546 to avoid having to create and discard all that rtl. */
11549 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
11555 /* EBP and R13 cannot be encoded without an offset. */
11556 len = (regno == BP_REG || regno == R13_REG);
11558 else if (IN_RANGE (offset, -128, 127))
11561 /* ESP and R12 must be encoded with a SIB byte. */
11562 if (regno == SP_REG || regno == R12_REG)
11568 /* Return an RTX that points to CFA_OFFSET within the stack frame.
11569 The valid base registers are taken from CFUN->MACHINE->FS. */
11572 choose_baseaddr (HOST_WIDE_INT cfa_offset)
11574 const struct machine_function *m = cfun->machine;
11575 rtx base_reg = NULL;
11576 HOST_WIDE_INT base_offset = 0;
11578 if (m->use_fast_prologue_epilogue)
11580 /* Choose the base register most likely to allow the most scheduling
11581 opportunities. Generally FP is valid throughout the function,
11582 while DRAP must be reloaded within the epilogue. But choose either
11583 over the SP due to increased encoding size. */
11585 if (m->fs.fp_valid)
11587 base_reg = hard_frame_pointer_rtx;
11588 base_offset = m->fs.fp_offset - cfa_offset;
11590 else if (m->fs.drap_valid)
11592 base_reg = crtl->drap_reg;
11593 base_offset = 0 - cfa_offset;
11595 else if (m->fs.sp_valid)
11597 base_reg = stack_pointer_rtx;
11598 base_offset = m->fs.sp_offset - cfa_offset;
11603 HOST_WIDE_INT toffset;
11604 int len = 16, tlen;
11606 /* Choose the base register with the smallest address encoding.
11607 With a tie, choose FP > DRAP > SP. */
11608 if (m->fs.sp_valid)
11610 base_reg = stack_pointer_rtx;
11611 base_offset = m->fs.sp_offset - cfa_offset;
11612 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
11614 if (m->fs.drap_valid)
11616 toffset = 0 - cfa_offset;
11617 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
11620 base_reg = crtl->drap_reg;
11621 base_offset = toffset;
11625 if (m->fs.fp_valid)
11627 toffset = m->fs.fp_offset - cfa_offset;
11628 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
11631 base_reg = hard_frame_pointer_rtx;
11632 base_offset = toffset;
11637 gcc_assert (base_reg != NULL);
11639 return plus_constant (Pmode, base_reg, base_offset);
11642 /* Emit code to save registers in the prologue. */
11645 ix86_emit_save_regs (void)
11647 unsigned int regno;
11650 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
11651 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11653 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
11654 RTX_FRAME_RELATED_P (insn) = 1;
11658 /* Emit a single register save at CFA - CFA_OFFSET. */
11661 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
11662 HOST_WIDE_INT cfa_offset)
11664 struct machine_function *m = cfun->machine;
11665 rtx reg = gen_rtx_REG (mode, regno);
11666 rtx unspec = NULL_RTX;
11667 rtx mem, addr, base, insn;
11668 unsigned int align;
11670 addr = choose_baseaddr (cfa_offset);
11671 mem = gen_frame_mem (mode, addr);
11673 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
11674 align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
11675 set_mem_align (mem, align);
11677 /* SSE saves are not within re-aligned local stack frame.
11678 In case INCOMING_STACK_BOUNDARY is misaligned, we have
11679 to emit unaligned store. */
11680 if (mode == V4SFmode && align < 128)
11681 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
11683 insn = emit_insn (gen_rtx_SET (mem, unspec ? unspec : reg));
11684 RTX_FRAME_RELATED_P (insn) = 1;
11687 if (GET_CODE (base) == PLUS)
11688 base = XEXP (base, 0);
11689 gcc_checking_assert (REG_P (base));
11691 /* When saving registers into a re-aligned local stack frame, avoid
11692 any tricky guessing by dwarf2out. */
11693 if (m->fs.realigned)
11695 gcc_checking_assert (stack_realign_drap);
11697 if (regno == REGNO (crtl->drap_reg))
11699 /* A bit of a hack. We force the DRAP register to be saved in
11700 the re-aligned stack frame, which provides us with a copy
11701 of the CFA that will last past the prologue. Install it. */
11702 gcc_checking_assert (cfun->machine->fs.fp_valid);
11703 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11704 cfun->machine->fs.fp_offset - cfa_offset);
11705 mem = gen_rtx_MEM (mode, addr);
11706 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
11710 /* The frame pointer is a stable reference within the
11711 aligned frame. Use it. */
11712 gcc_checking_assert (cfun->machine->fs.fp_valid);
11713 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
11714 cfun->machine->fs.fp_offset - cfa_offset);
11715 mem = gen_rtx_MEM (mode, addr);
11716 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11720 /* The memory may not be relative to the current CFA register,
11721 which means that we may need to generate a new pattern for
11722 use by the unwind info. */
11723 else if (base != m->fs.cfa_reg)
11725 addr = plus_constant (Pmode, m->fs.cfa_reg,
11726 m->fs.cfa_offset - cfa_offset);
11727 mem = gen_rtx_MEM (mode, addr);
11728 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
11731 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
11734 /* Emit code to save registers using MOV insns.
11735 First register is stored at CFA - CFA_OFFSET. */
11737 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
11739 unsigned int regno;
11741 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11742 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true))
11744 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
11745 cfa_offset -= UNITS_PER_WORD;
11749 /* Emit code to save SSE registers using MOV insns.
11750 First register is stored at CFA - CFA_OFFSET. */
11752 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
11754 unsigned int regno;
11756 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11757 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
11759 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
11760 cfa_offset -= GET_MODE_SIZE (V4SFmode);
11764 static GTY(()) rtx queued_cfa_restores;
11766 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
11767 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
11768 Don't add the note if the previously saved value will be left untouched
11769 within stack red-zone till return, as unwinders can find the same value
11770 in the register and on the stack. */
11773 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
11775 if (!crtl->shrink_wrapped
11776 && cfa_offset <= cfun->machine->fs.red_zone_offset)
11781 add_reg_note (insn, REG_CFA_RESTORE, reg);
11782 RTX_FRAME_RELATED_P (insn) = 1;
11785 queued_cfa_restores
11786 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
11789 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
11792 ix86_add_queued_cfa_restore_notes (rtx insn)
11795 if (!queued_cfa_restores)
11797 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
11799 XEXP (last, 1) = REG_NOTES (insn);
11800 REG_NOTES (insn) = queued_cfa_restores;
11801 queued_cfa_restores = NULL_RTX;
11802 RTX_FRAME_RELATED_P (insn) = 1;
11805 /* Expand prologue or epilogue stack adjustment.
11806 The pattern exist to put a dependency on all ebp-based memory accesses.
11807 STYLE should be negative if instructions should be marked as frame related,
11808 zero if %r11 register is live and cannot be freely used and positive
11812 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
11813 int style, bool set_cfa)
11815 struct machine_function *m = cfun->machine;
11817 bool add_frame_related_expr = false;
11819 if (Pmode == SImode)
11820 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
11821 else if (x86_64_immediate_operand (offset, DImode))
11822 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
11826 /* r11 is used by indirect sibcall return as well, set before the
11827 epilogue and used after the epilogue. */
11829 tmp = gen_rtx_REG (DImode, R11_REG);
11832 gcc_assert (src != hard_frame_pointer_rtx
11833 && dest != hard_frame_pointer_rtx);
11834 tmp = hard_frame_pointer_rtx;
11836 insn = emit_insn (gen_rtx_SET (tmp, offset));
11838 add_frame_related_expr = true;
11840 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
11843 insn = emit_insn (insn);
11845 ix86_add_queued_cfa_restore_notes (insn);
11851 gcc_assert (m->fs.cfa_reg == src);
11852 m->fs.cfa_offset += INTVAL (offset);
11853 m->fs.cfa_reg = dest;
11855 r = gen_rtx_PLUS (Pmode, src, offset);
11856 r = gen_rtx_SET (dest, r);
11857 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
11858 RTX_FRAME_RELATED_P (insn) = 1;
11860 else if (style < 0)
11862 RTX_FRAME_RELATED_P (insn) = 1;
11863 if (add_frame_related_expr)
11865 rtx r = gen_rtx_PLUS (Pmode, src, offset);
11866 r = gen_rtx_SET (dest, r);
11867 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
11871 if (dest == stack_pointer_rtx)
11873 HOST_WIDE_INT ooffset = m->fs.sp_offset;
11874 bool valid = m->fs.sp_valid;
11876 if (src == hard_frame_pointer_rtx)
11878 valid = m->fs.fp_valid;
11879 ooffset = m->fs.fp_offset;
11881 else if (src == crtl->drap_reg)
11883 valid = m->fs.drap_valid;
11888 /* Else there are two possibilities: SP itself, which we set
11889 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
11890 taken care of this by hand along the eh_return path. */
11891 gcc_checking_assert (src == stack_pointer_rtx
11892 || offset == const0_rtx);
11895 m->fs.sp_offset = ooffset - INTVAL (offset);
11896 m->fs.sp_valid = valid;
11900 /* Find an available register to be used as dynamic realign argument
11901 pointer regsiter. Such a register will be written in prologue and
11902 used in begin of body, so it must not be
11903 1. parameter passing register.
11905 We reuse static-chain register if it is available. Otherwise, we
11906 use DI for i386 and R13 for x86-64. We chose R13 since it has
11909 Return: the regno of chosen register. */
11911 static unsigned int
11912 find_drap_reg (void)
11914 tree decl = cfun->decl;
11918 /* Use R13 for nested function or function need static chain.
11919 Since function with tail call may use any caller-saved
11920 registers in epilogue, DRAP must not use caller-saved
11921 register in such case. */
11922 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11929 /* Use DI for nested function or function need static chain.
11930 Since function with tail call may use any caller-saved
11931 registers in epilogue, DRAP must not use caller-saved
11932 register in such case. */
11933 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
11936 /* Reuse static chain register if it isn't used for parameter
11938 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
11940 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
11941 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
11948 /* Handle a "force_align_arg_pointer" attribute. */
11951 ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
11952 tree, int, bool *no_add_attrs)
11954 if (TREE_CODE (*node) != FUNCTION_TYPE
11955 && TREE_CODE (*node) != METHOD_TYPE
11956 && TREE_CODE (*node) != FIELD_DECL
11957 && TREE_CODE (*node) != TYPE_DECL)
11959 warning (OPT_Wattributes, "%qE attribute only applies to functions",
11961 *no_add_attrs = true;
11967 /* Return minimum incoming stack alignment. */
11969 static unsigned int
11970 ix86_minimum_incoming_stack_boundary (bool sibcall)
11972 unsigned int incoming_stack_boundary;
11974 /* Prefer the one specified at command line. */
11975 if (ix86_user_incoming_stack_boundary)
11976 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
11977 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
11978 if -mstackrealign is used, it isn't used for sibcall check and
11979 estimated stack alignment is 128bit. */
11981 && ix86_force_align_arg_pointer
11982 && crtl->stack_alignment_estimated == 128)
11983 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11985 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
11987 /* Incoming stack alignment can be changed on individual functions
11988 via force_align_arg_pointer attribute. We use the smallest
11989 incoming stack boundary. */
11990 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
11991 && lookup_attribute (ix86_force_align_arg_pointer_string,
11992 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
11993 incoming_stack_boundary = MIN_STACK_BOUNDARY;
11995 /* The incoming stack frame has to be aligned at least at
11996 parm_stack_boundary. */
11997 if (incoming_stack_boundary < crtl->parm_stack_boundary)
11998 incoming_stack_boundary = crtl->parm_stack_boundary;
12000 /* Stack at entrance of main is aligned by runtime. We use the
12001 smallest incoming stack boundary. */
12002 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
12003 && DECL_NAME (current_function_decl)
12004 && MAIN_NAME_P (DECL_NAME (current_function_decl))
12005 && DECL_FILE_SCOPE_P (current_function_decl))
12006 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
12008 return incoming_stack_boundary;
12011 /* Update incoming stack boundary and estimated stack alignment. */
12014 ix86_update_stack_boundary (void)
12016 ix86_incoming_stack_boundary
12017 = ix86_minimum_incoming_stack_boundary (false);
12019 /* x86_64 vararg needs 16byte stack alignment for register save
12023 && crtl->stack_alignment_estimated < 128)
12024 crtl->stack_alignment_estimated = 128;
12027 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
12028 needed or an rtx for DRAP otherwise. */
12031 ix86_get_drap_rtx (void)
12033 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
12034 crtl->need_drap = true;
12036 if (stack_realign_drap)
12038 /* Assign DRAP to vDRAP and returns vDRAP */
12039 unsigned int regno = find_drap_reg ();
12042 rtx_insn *seq, *insn;
12044 arg_ptr = gen_rtx_REG (Pmode, regno);
12045 crtl->drap_reg = arg_ptr;
12048 drap_vreg = copy_to_reg (arg_ptr);
12049 seq = get_insns ();
12052 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
12055 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
12056 RTX_FRAME_RELATED_P (insn) = 1;
12064 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
12067 ix86_internal_arg_pointer (void)
12069 return virtual_incoming_args_rtx;
12072 struct scratch_reg {
12077 /* Return a short-lived scratch register for use on function entry.
12078 In 32-bit mode, it is valid only after the registers are saved
12079 in the prologue. This register must be released by means of
12080 release_scratch_register_on_entry once it is dead. */
12083 get_scratch_register_on_entry (struct scratch_reg *sr)
12091 /* We always use R11 in 64-bit mode. */
12096 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
12098 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12100 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
12101 bool static_chain_p = DECL_STATIC_CHAIN (decl);
12102 int regparm = ix86_function_regparm (fntype, decl);
12104 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
12106 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
12107 for the static chain register. */
12108 if ((regparm < 1 || (fastcall_p && !static_chain_p))
12109 && drap_regno != AX_REG)
12111 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
12112 for the static chain register. */
12113 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
12115 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
12117 /* ecx is the static chain register. */
12118 else if (regparm < 3 && !fastcall_p && !thiscall_p
12120 && drap_regno != CX_REG)
12122 else if (ix86_save_reg (BX_REG, true))
12124 /* esi is the static chain register. */
12125 else if (!(regparm == 3 && static_chain_p)
12126 && ix86_save_reg (SI_REG, true))
12128 else if (ix86_save_reg (DI_REG, true))
12132 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
12137 sr->reg = gen_rtx_REG (Pmode, regno);
12140 rtx_insn *insn = emit_insn (gen_push (sr->reg));
12141 RTX_FRAME_RELATED_P (insn) = 1;
12145 /* Release a scratch register obtained from the preceding function. */
12148 release_scratch_register_on_entry (struct scratch_reg *sr)
12152 struct machine_function *m = cfun->machine;
12153 rtx x, insn = emit_insn (gen_pop (sr->reg));
12155 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
12156 RTX_FRAME_RELATED_P (insn) = 1;
12157 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
12158 x = gen_rtx_SET (stack_pointer_rtx, x);
12159 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
12160 m->fs.sp_offset -= UNITS_PER_WORD;
12164 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
12166 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
12169 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
12171 /* We skip the probe for the first interval + a small dope of 4 words and
12172 probe that many bytes past the specified size to maintain a protection
12173 area at the botton of the stack. */
12174 const int dope = 4 * UNITS_PER_WORD;
12175 rtx size_rtx = GEN_INT (size), last;
12177 /* See if we have a constant small number of probes to generate. If so,
12178 that's the easy case. The run-time loop is made up of 9 insns in the
12179 generic case while the compile-time loop is made up of 3+2*(n-1) insns
12180 for n # of intervals. */
12181 if (size <= 4 * PROBE_INTERVAL)
12183 HOST_WIDE_INT i, adjust;
12184 bool first_probe = true;
12186 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
12187 values of N from 1 until it exceeds SIZE. If only one probe is
12188 needed, this will not generate any code. Then adjust and probe
12189 to PROBE_INTERVAL + SIZE. */
12190 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12194 adjust = 2 * PROBE_INTERVAL + dope;
12195 first_probe = false;
12198 adjust = PROBE_INTERVAL;
12200 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12201 plus_constant (Pmode, stack_pointer_rtx,
12203 emit_stack_probe (stack_pointer_rtx);
12207 adjust = size + PROBE_INTERVAL + dope;
12209 adjust = size + PROBE_INTERVAL - i;
12211 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12212 plus_constant (Pmode, stack_pointer_rtx,
12214 emit_stack_probe (stack_pointer_rtx);
12216 /* Adjust back to account for the additional first interval. */
12217 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12218 plus_constant (Pmode, stack_pointer_rtx,
12219 PROBE_INTERVAL + dope)));
12222 /* Otherwise, do the same as above, but in a loop. Note that we must be
12223 extra careful with variables wrapping around because we might be at
12224 the very top (or the very bottom) of the address space and we have
12225 to be able to handle this case properly; in particular, we use an
12226 equality test for the loop condition. */
12229 HOST_WIDE_INT rounded_size;
12230 struct scratch_reg sr;
12232 get_scratch_register_on_entry (&sr);
12235 /* Step 1: round SIZE to the previous multiple of the interval. */
12237 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12240 /* Step 2: compute initial and final value of the loop counter. */
12242 /* SP = SP_0 + PROBE_INTERVAL. */
12243 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12244 plus_constant (Pmode, stack_pointer_rtx,
12245 - (PROBE_INTERVAL + dope))));
12247 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
12248 if (rounded_size <= (HOST_WIDE_INT_1 << 31))
12249 emit_insn (gen_rtx_SET (sr.reg,
12250 plus_constant (Pmode, stack_pointer_rtx,
12254 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
12255 emit_insn (gen_rtx_SET (sr.reg,
12256 gen_rtx_PLUS (Pmode, sr.reg,
12257 stack_pointer_rtx)));
12261 /* Step 3: the loop
12265 SP = SP + PROBE_INTERVAL
12268 while (SP != LAST_ADDR)
12270 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
12271 values of N from 1 until it is equal to ROUNDED_SIZE. */
12273 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
12276 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
12277 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
12279 if (size != rounded_size)
12281 emit_insn (gen_rtx_SET (stack_pointer_rtx,
12282 plus_constant (Pmode, stack_pointer_rtx,
12283 rounded_size - size)));
12284 emit_stack_probe (stack_pointer_rtx);
12287 /* Adjust back to account for the additional first interval. */
12288 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
12289 plus_constant (Pmode, stack_pointer_rtx,
12290 PROBE_INTERVAL + dope)));
12292 release_scratch_register_on_entry (&sr);
12295 /* Even if the stack pointer isn't the CFA register, we need to correctly
12296 describe the adjustments made to it, in particular differentiate the
12297 frame-related ones from the frame-unrelated ones. */
12300 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
12301 XVECEXP (expr, 0, 0)
12302 = gen_rtx_SET (stack_pointer_rtx,
12303 plus_constant (Pmode, stack_pointer_rtx, -size));
12304 XVECEXP (expr, 0, 1)
12305 = gen_rtx_SET (stack_pointer_rtx,
12306 plus_constant (Pmode, stack_pointer_rtx,
12307 PROBE_INTERVAL + dope + size));
12308 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
12309 RTX_FRAME_RELATED_P (last) = 1;
12311 cfun->machine->fs.sp_offset += size;
12314 /* Make sure nothing is scheduled before we are done. */
12315 emit_insn (gen_blockage ());
12318 /* Adjust the stack pointer up to REG while probing it. */
12321 output_adjust_stack_and_probe (rtx reg)
12323 static int labelno = 0;
12327 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12330 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12332 /* SP = SP + PROBE_INTERVAL. */
12333 xops[0] = stack_pointer_rtx;
12334 xops[1] = GEN_INT (PROBE_INTERVAL);
12335 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12338 xops[1] = const0_rtx;
12339 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
12341 /* Test if SP == LAST_ADDR. */
12342 xops[0] = stack_pointer_rtx;
12344 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12347 fputs ("\tjne\t", asm_out_file);
12348 assemble_name_raw (asm_out_file, loop_lab);
12349 fputc ('\n', asm_out_file);
12354 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
12355 inclusive. These are offsets from the current stack pointer. */
12358 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
12360 /* See if we have a constant small number of probes to generate. If so,
12361 that's the easy case. The run-time loop is made up of 6 insns in the
12362 generic case while the compile-time loop is made up of n insns for n #
12364 if (size <= 6 * PROBE_INTERVAL)
12368 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
12369 it exceeds SIZE. If only one probe is needed, this will not
12370 generate any code. Then probe at FIRST + SIZE. */
12371 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
12372 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12375 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
12379 /* Otherwise, do the same as above, but in a loop. Note that we must be
12380 extra careful with variables wrapping around because we might be at
12381 the very top (or the very bottom) of the address space and we have
12382 to be able to handle this case properly; in particular, we use an
12383 equality test for the loop condition. */
12386 HOST_WIDE_INT rounded_size, last;
12387 struct scratch_reg sr;
12389 get_scratch_register_on_entry (&sr);
12392 /* Step 1: round SIZE to the previous multiple of the interval. */
12394 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
12397 /* Step 2: compute initial and final value of the loop counter. */
12399 /* TEST_OFFSET = FIRST. */
12400 emit_move_insn (sr.reg, GEN_INT (-first));
12402 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
12403 last = first + rounded_size;
12406 /* Step 3: the loop
12410 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
12413 while (TEST_ADDR != LAST_ADDR)
12415 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
12416 until it is equal to ROUNDED_SIZE. */
12418 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
12421 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
12422 that SIZE is equal to ROUNDED_SIZE. */
12424 if (size != rounded_size)
12425 emit_stack_probe (plus_constant (Pmode,
12426 gen_rtx_PLUS (Pmode,
12429 rounded_size - size));
12431 release_scratch_register_on_entry (&sr);
12434 /* Make sure nothing is scheduled before we are done. */
12435 emit_insn (gen_blockage ());
12438 /* Probe a range of stack addresses from REG to END, inclusive. These are
12439 offsets from the current stack pointer. */
12442 output_probe_stack_range (rtx reg, rtx end)
12444 static int labelno = 0;
12448 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
12451 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
12453 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
12455 xops[1] = GEN_INT (PROBE_INTERVAL);
12456 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
12458 /* Probe at TEST_ADDR. */
12459 xops[0] = stack_pointer_rtx;
12461 xops[2] = const0_rtx;
12462 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
12464 /* Test if TEST_ADDR == LAST_ADDR. */
12467 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
12470 fputs ("\tjne\t", asm_out_file);
12471 assemble_name_raw (asm_out_file, loop_lab);
12472 fputc ('\n', asm_out_file);
12477 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
12478 to be generated in correct form. */
12480 ix86_finalize_stack_realign_flags (void)
12482 /* Check if stack realign is really needed after reload, and
12483 stores result in cfun */
12484 unsigned int incoming_stack_boundary
12485 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
12486 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
12487 unsigned int stack_realign = (incoming_stack_boundary
12489 ? crtl->max_used_stack_slot_alignment
12490 : crtl->stack_alignment_needed));
12492 if (crtl->stack_realign_finalized)
12494 /* After stack_realign_needed is finalized, we can't no longer
12496 gcc_assert (crtl->stack_realign_needed == stack_realign);
12500 /* If the only reason for frame_pointer_needed is that we conservatively
12501 assumed stack realignment might be needed, but in the end nothing that
12502 needed the stack alignment had been spilled, clear frame_pointer_needed
12503 and say we don't need stack realignment. */
12505 && frame_pointer_needed
12507 && flag_omit_frame_pointer
12508 && crtl->sp_is_unchanging
12509 && !ix86_current_function_calls_tls_descriptor
12510 && !crtl->accesses_prior_frames
12511 && !cfun->calls_alloca
12512 && !crtl->calls_eh_return
12513 /* See ira_setup_eliminable_regset for the rationale. */
12514 && !(STACK_CHECK_MOVING_SP
12515 && flag_stack_check
12517 && cfun->can_throw_non_call_exceptions)
12518 && !ix86_frame_pointer_required ()
12519 && get_frame_size () == 0
12520 && ix86_nsaved_sseregs () == 0
12521 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
12523 HARD_REG_SET set_up_by_prologue, prologue_used;
12526 CLEAR_HARD_REG_SET (prologue_used);
12527 CLEAR_HARD_REG_SET (set_up_by_prologue);
12528 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
12529 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
12530 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
12531 HARD_FRAME_POINTER_REGNUM);
12532 FOR_EACH_BB_FN (bb, cfun)
12535 FOR_BB_INSNS (bb, insn)
12536 if (NONDEBUG_INSN_P (insn)
12537 && requires_stack_frame_p (insn, prologue_used,
12538 set_up_by_prologue))
12540 crtl->stack_realign_needed = stack_realign;
12541 crtl->stack_realign_finalized = true;
12546 /* If drap has been set, but it actually isn't live at the start
12547 of the function, there is no reason to set it up. */
12548 if (crtl->drap_reg)
12550 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12551 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
12553 crtl->drap_reg = NULL_RTX;
12554 crtl->need_drap = false;
12558 cfun->machine->no_drap_save_restore = true;
12560 frame_pointer_needed = false;
12561 stack_realign = false;
12562 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
12563 crtl->stack_alignment_needed = incoming_stack_boundary;
12564 crtl->stack_alignment_estimated = incoming_stack_boundary;
12565 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
12566 crtl->preferred_stack_boundary = incoming_stack_boundary;
12567 df_finish_pass (true);
12568 df_scan_alloc (NULL);
12570 df_compute_regs_ever_live (true);
12574 crtl->stack_realign_needed = stack_realign;
12575 crtl->stack_realign_finalized = true;
12578 /* Delete SET_GOT right after entry block if it is allocated to reg. */
12581 ix86_elim_entry_set_got (rtx reg)
12583 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
12584 rtx_insn *c_insn = BB_HEAD (bb);
12585 if (!NONDEBUG_INSN_P (c_insn))
12586 c_insn = next_nonnote_nondebug_insn (c_insn);
12587 if (c_insn && NONJUMP_INSN_P (c_insn))
12589 rtx pat = PATTERN (c_insn);
12590 if (GET_CODE (pat) == PARALLEL)
12592 rtx vec = XVECEXP (pat, 0, 0);
12593 if (GET_CODE (vec) == SET
12594 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
12595 && REGNO (XEXP (vec, 0)) == REGNO (reg))
12596 delete_insn (c_insn);
12601 /* Expand the prologue into a bunch of separate insns. */
12604 ix86_expand_prologue (void)
12606 struct machine_function *m = cfun->machine;
12608 struct ix86_frame frame;
12609 HOST_WIDE_INT allocate;
12610 bool int_registers_saved;
12611 bool sse_registers_saved;
12612 rtx static_chain = NULL_RTX;
12614 ix86_finalize_stack_realign_flags ();
12616 /* DRAP should not coexist with stack_realign_fp */
12617 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
12619 memset (&m->fs, 0, sizeof (m->fs));
12621 /* Initialize CFA state for before the prologue. */
12622 m->fs.cfa_reg = stack_pointer_rtx;
12623 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
12625 /* Track SP offset to the CFA. We continue tracking this after we've
12626 swapped the CFA register away from SP. In the case of re-alignment
12627 this is fudged; we're interested to offsets within the local frame. */
12628 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12629 m->fs.sp_valid = true;
12631 ix86_compute_frame_layout (&frame);
12633 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
12635 /* We should have already generated an error for any use of
12636 ms_hook on a nested function. */
12637 gcc_checking_assert (!ix86_static_chain_on_stack);
12639 /* Check if profiling is active and we shall use profiling before
12640 prologue variant. If so sorry. */
12641 if (crtl->profile && flag_fentry != 0)
12642 sorry ("ms_hook_prologue attribute isn%'t compatible "
12643 "with -mfentry for 32-bit");
12645 /* In ix86_asm_output_function_label we emitted:
12646 8b ff movl.s %edi,%edi
12648 8b ec movl.s %esp,%ebp
12650 This matches the hookable function prologue in Win32 API
12651 functions in Microsoft Windows XP Service Pack 2 and newer.
12652 Wine uses this to enable Windows apps to hook the Win32 API
12653 functions provided by Wine.
12655 What that means is that we've already set up the frame pointer. */
12657 if (frame_pointer_needed
12658 && !(crtl->drap_reg && crtl->stack_realign_needed))
12662 /* We've decided to use the frame pointer already set up.
12663 Describe this to the unwinder by pretending that both
12664 push and mov insns happen right here.
12666 Putting the unwind info here at the end of the ms_hook
12667 is done so that we can make absolutely certain we get
12668 the required byte sequence at the start of the function,
12669 rather than relying on an assembler that can produce
12670 the exact encoding required.
12672 However it does mean (in the unpatched case) that we have
12673 a 1 insn window where the asynchronous unwind info is
12674 incorrect. However, if we placed the unwind info at
12675 its correct location we would have incorrect unwind info
12676 in the patched case. Which is probably all moot since
12677 I don't expect Wine generates dwarf2 unwind info for the
12678 system libraries that use this feature. */
12680 insn = emit_insn (gen_blockage ());
12682 push = gen_push (hard_frame_pointer_rtx);
12683 mov = gen_rtx_SET (hard_frame_pointer_rtx,
12684 stack_pointer_rtx);
12685 RTX_FRAME_RELATED_P (push) = 1;
12686 RTX_FRAME_RELATED_P (mov) = 1;
12688 RTX_FRAME_RELATED_P (insn) = 1;
12689 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12690 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
12692 /* Note that gen_push incremented m->fs.cfa_offset, even
12693 though we didn't emit the push insn here. */
12694 m->fs.cfa_reg = hard_frame_pointer_rtx;
12695 m->fs.fp_offset = m->fs.cfa_offset;
12696 m->fs.fp_valid = true;
12700 /* The frame pointer is not needed so pop %ebp again.
12701 This leaves us with a pristine state. */
12702 emit_insn (gen_pop (hard_frame_pointer_rtx));
12706 /* The first insn of a function that accepts its static chain on the
12707 stack is to push the register that would be filled in by a direct
12708 call. This insn will be skipped by the trampoline. */
12709 else if (ix86_static_chain_on_stack)
12711 static_chain = ix86_static_chain (cfun->decl, false);
12712 insn = emit_insn (gen_push (static_chain));
12713 emit_insn (gen_blockage ());
12715 /* We don't want to interpret this push insn as a register save,
12716 only as a stack adjustment. The real copy of the register as
12717 a save will be done later, if needed. */
12718 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
12719 t = gen_rtx_SET (stack_pointer_rtx, t);
12720 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
12721 RTX_FRAME_RELATED_P (insn) = 1;
12724 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
12725 of DRAP is needed and stack realignment is really needed after reload */
12726 if (stack_realign_drap)
12728 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12730 /* Only need to push parameter pointer reg if it is caller saved. */
12731 if (!call_used_regs[REGNO (crtl->drap_reg)])
12733 /* Push arg pointer reg */
12734 insn = emit_insn (gen_push (crtl->drap_reg));
12735 RTX_FRAME_RELATED_P (insn) = 1;
12738 /* Grab the argument pointer. */
12739 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
12740 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
12741 RTX_FRAME_RELATED_P (insn) = 1;
12742 m->fs.cfa_reg = crtl->drap_reg;
12743 m->fs.cfa_offset = 0;
12745 /* Align the stack. */
12746 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12748 GEN_INT (-align_bytes)));
12749 RTX_FRAME_RELATED_P (insn) = 1;
12751 /* Replicate the return address on the stack so that return
12752 address can be reached via (argp - 1) slot. This is needed
12753 to implement macro RETURN_ADDR_RTX and intrinsic function
12754 expand_builtin_return_addr etc. */
12755 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
12756 t = gen_frame_mem (word_mode, t);
12757 insn = emit_insn (gen_push (t));
12758 RTX_FRAME_RELATED_P (insn) = 1;
12760 /* For the purposes of frame and register save area addressing,
12761 we've started over with a new frame. */
12762 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
12763 m->fs.realigned = true;
12767 /* Replicate static chain on the stack so that static chain
12768 can be reached via (argp - 2) slot. This is needed for
12769 nested function with stack realignment. */
12770 insn = emit_insn (gen_push (static_chain));
12771 RTX_FRAME_RELATED_P (insn) = 1;
12775 int_registers_saved = (frame.nregs == 0);
12776 sse_registers_saved = (frame.nsseregs == 0);
12778 if (frame_pointer_needed && !m->fs.fp_valid)
12780 /* Note: AT&T enter does NOT have reversed args. Enter is probably
12781 slower on all targets. Also sdb doesn't like it. */
12782 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
12783 RTX_FRAME_RELATED_P (insn) = 1;
12785 /* Push registers now, before setting the frame pointer
12787 if (!int_registers_saved
12789 && !frame.save_regs_using_mov)
12791 ix86_emit_save_regs ();
12792 int_registers_saved = true;
12793 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12796 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
12798 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
12799 RTX_FRAME_RELATED_P (insn) = 1;
12801 if (m->fs.cfa_reg == stack_pointer_rtx)
12802 m->fs.cfa_reg = hard_frame_pointer_rtx;
12803 m->fs.fp_offset = m->fs.sp_offset;
12804 m->fs.fp_valid = true;
12808 if (!int_registers_saved)
12810 /* If saving registers via PUSH, do so now. */
12811 if (!frame.save_regs_using_mov)
12813 ix86_emit_save_regs ();
12814 int_registers_saved = true;
12815 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
12818 /* When using red zone we may start register saving before allocating
12819 the stack frame saving one cycle of the prologue. However, avoid
12820 doing this if we have to probe the stack; at least on x86_64 the
12821 stack probe can turn into a call that clobbers a red zone location. */
12822 else if (ix86_using_red_zone ()
12823 && (! TARGET_STACK_PROBE
12824 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
12826 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
12827 int_registers_saved = true;
12831 if (stack_realign_fp)
12833 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
12834 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
12836 /* The computation of the size of the re-aligned stack frame means
12837 that we must allocate the size of the register save area before
12838 performing the actual alignment. Otherwise we cannot guarantee
12839 that there's enough storage above the realignment point. */
12840 if (m->fs.sp_offset != frame.sse_reg_save_offset)
12841 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12842 GEN_INT (m->fs.sp_offset
12843 - frame.sse_reg_save_offset),
12846 /* Align the stack. */
12847 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
12849 GEN_INT (-align_bytes)));
12851 /* For the purposes of register save area addressing, the stack
12852 pointer is no longer valid. As for the value of sp_offset,
12853 see ix86_compute_frame_layout, which we need to match in order
12854 to pass verification of stack_pointer_offset at the end. */
12855 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
12856 m->fs.sp_valid = false;
12859 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
12861 if (flag_stack_usage_info)
12863 /* We start to count from ARG_POINTER. */
12864 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
12866 /* If it was realigned, take into account the fake frame. */
12867 if (stack_realign_drap)
12869 if (ix86_static_chain_on_stack)
12870 stack_size += UNITS_PER_WORD;
12872 if (!call_used_regs[REGNO (crtl->drap_reg)])
12873 stack_size += UNITS_PER_WORD;
12875 /* This over-estimates by 1 minimal-stack-alignment-unit but
12876 mitigates that by counting in the new return address slot. */
12877 current_function_dynamic_stack_size
12878 += crtl->stack_alignment_needed / BITS_PER_UNIT;
12881 current_function_static_stack_size = stack_size;
12884 /* On SEH target with very large frame size, allocate an area to save
12885 SSE registers (as the very large allocation won't be described). */
12887 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
12888 && !sse_registers_saved)
12890 HOST_WIDE_INT sse_size =
12891 frame.sse_reg_save_offset - frame.reg_save_offset;
12893 gcc_assert (int_registers_saved);
12895 /* No need to do stack checking as the area will be immediately
12897 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12898 GEN_INT (-sse_size), -1,
12899 m->fs.cfa_reg == stack_pointer_rtx);
12900 allocate -= sse_size;
12901 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
12902 sse_registers_saved = true;
12905 /* The stack has already been decremented by the instruction calling us
12906 so probe if the size is non-negative to preserve the protection area. */
12907 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
12909 /* We expect the registers to be saved when probes are used. */
12910 gcc_assert (int_registers_saved);
12912 if (STACK_CHECK_MOVING_SP)
12914 if (!(crtl->is_leaf && !cfun->calls_alloca
12915 && allocate <= PROBE_INTERVAL))
12917 ix86_adjust_stack_and_probe (allocate);
12923 HOST_WIDE_INT size = allocate;
12925 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
12926 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
12928 if (TARGET_STACK_PROBE)
12930 if (crtl->is_leaf && !cfun->calls_alloca)
12932 if (size > PROBE_INTERVAL)
12933 ix86_emit_probe_stack_range (0, size);
12936 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
12940 if (crtl->is_leaf && !cfun->calls_alloca)
12942 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
12943 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
12944 size - STACK_CHECK_PROTECT);
12947 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
12954 else if (!ix86_target_stack_probe ()
12955 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
12957 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12958 GEN_INT (-allocate), -1,
12959 m->fs.cfa_reg == stack_pointer_rtx);
12963 rtx eax = gen_rtx_REG (Pmode, AX_REG);
12965 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
12966 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
12967 bool eax_live = ix86_eax_live_at_start_p ();
12968 bool r10_live = false;
12971 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
12975 insn = emit_insn (gen_push (eax));
12976 allocate -= UNITS_PER_WORD;
12977 /* Note that SEH directives need to continue tracking the stack
12978 pointer even after the frame pointer has been set up. */
12979 if (sp_is_cfa_reg || TARGET_SEH)
12982 m->fs.cfa_offset += UNITS_PER_WORD;
12983 RTX_FRAME_RELATED_P (insn) = 1;
12984 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
12985 gen_rtx_SET (stack_pointer_rtx,
12986 plus_constant (Pmode, stack_pointer_rtx,
12987 -UNITS_PER_WORD)));
12993 r10 = gen_rtx_REG (Pmode, R10_REG);
12994 insn = emit_insn (gen_push (r10));
12995 allocate -= UNITS_PER_WORD;
12996 if (sp_is_cfa_reg || TARGET_SEH)
12999 m->fs.cfa_offset += UNITS_PER_WORD;
13000 RTX_FRAME_RELATED_P (insn) = 1;
13001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13002 gen_rtx_SET (stack_pointer_rtx,
13003 plus_constant (Pmode, stack_pointer_rtx,
13004 -UNITS_PER_WORD)));
13008 emit_move_insn (eax, GEN_INT (allocate));
13009 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
13011 /* Use the fact that AX still contains ALLOCATE. */
13012 adjust_stack_insn = (Pmode == DImode
13013 ? gen_pro_epilogue_adjust_stack_di_sub
13014 : gen_pro_epilogue_adjust_stack_si_sub);
13016 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
13017 stack_pointer_rtx, eax));
13019 if (sp_is_cfa_reg || TARGET_SEH)
13022 m->fs.cfa_offset += allocate;
13023 RTX_FRAME_RELATED_P (insn) = 1;
13024 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
13025 gen_rtx_SET (stack_pointer_rtx,
13026 plus_constant (Pmode, stack_pointer_rtx,
13029 m->fs.sp_offset += allocate;
13031 /* Use stack_pointer_rtx for relative addressing so that code
13032 works for realigned stack, too. */
13033 if (r10_live && eax_live)
13035 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13036 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13037 gen_frame_mem (word_mode, t));
13038 t = plus_constant (Pmode, t, UNITS_PER_WORD);
13039 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
13040 gen_frame_mem (word_mode, t));
13042 else if (eax_live || r10_live)
13044 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
13045 emit_move_insn (gen_rtx_REG (word_mode,
13046 (eax_live ? AX_REG : R10_REG)),
13047 gen_frame_mem (word_mode, t));
13050 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
13052 /* If we havn't already set up the frame pointer, do so now. */
13053 if (frame_pointer_needed && !m->fs.fp_valid)
13055 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
13056 GEN_INT (frame.stack_pointer_offset
13057 - frame.hard_frame_pointer_offset));
13058 insn = emit_insn (insn);
13059 RTX_FRAME_RELATED_P (insn) = 1;
13060 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
13062 if (m->fs.cfa_reg == stack_pointer_rtx)
13063 m->fs.cfa_reg = hard_frame_pointer_rtx;
13064 m->fs.fp_offset = frame.hard_frame_pointer_offset;
13065 m->fs.fp_valid = true;
13068 if (!int_registers_saved)
13069 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
13070 if (!sse_registers_saved)
13071 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
13073 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
13075 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
13077 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
13078 insn = emit_insn (gen_set_got (pic));
13079 RTX_FRAME_RELATED_P (insn) = 1;
13080 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
13081 emit_insn (gen_prologue_use (pic));
13082 /* Deleting already emmitted SET_GOT if exist and allocated to
13083 REAL_PIC_OFFSET_TABLE_REGNUM. */
13084 ix86_elim_entry_set_got (pic);
13087 if (crtl->drap_reg && !crtl->stack_realign_needed)
13089 /* vDRAP is setup but after reload it turns out stack realign
13090 isn't necessary, here we will emit prologue to setup DRAP
13091 without stack realign adjustment */
13092 t = choose_baseaddr (0);
13093 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
13096 /* Prevent instructions from being scheduled into register save push
13097 sequence when access to the redzone area is done through frame pointer.
13098 The offset between the frame pointer and the stack pointer is calculated
13099 relative to the value of the stack pointer at the end of the function
13100 prologue, and moving instructions that access redzone area via frame
13101 pointer inside push sequence violates this assumption. */
13102 if (frame_pointer_needed && frame.red_zone_size)
13103 emit_insn (gen_memory_blockage ());
13105 /* Emit cld instruction if stringops are used in the function. */
13106 if (TARGET_CLD && ix86_current_function_needs_cld)
13107 emit_insn (gen_cld ());
13109 /* SEH requires that the prologue end within 256 bytes of the start of
13110 the function. Prevent instruction schedules that would extend that.
13111 Further, prevent alloca modifications to the stack pointer from being
13112 combined with prologue modifications. */
13114 emit_insn (gen_prologue_use (stack_pointer_rtx));
13117 /* Emit code to restore REG using a POP insn. */
13120 ix86_emit_restore_reg_using_pop (rtx reg)
13122 struct machine_function *m = cfun->machine;
13123 rtx_insn *insn = emit_insn (gen_pop (reg));
13125 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
13126 m->fs.sp_offset -= UNITS_PER_WORD;
13128 if (m->fs.cfa_reg == crtl->drap_reg
13129 && REGNO (reg) == REGNO (crtl->drap_reg))
13131 /* Previously we'd represented the CFA as an expression
13132 like *(%ebp - 8). We've just popped that value from
13133 the stack, which means we need to reset the CFA to
13134 the drap register. This will remain until we restore
13135 the stack pointer. */
13136 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13137 RTX_FRAME_RELATED_P (insn) = 1;
13139 /* This means that the DRAP register is valid for addressing too. */
13140 m->fs.drap_valid = true;
13144 if (m->fs.cfa_reg == stack_pointer_rtx)
13146 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13147 x = gen_rtx_SET (stack_pointer_rtx, x);
13148 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13149 RTX_FRAME_RELATED_P (insn) = 1;
13151 m->fs.cfa_offset -= UNITS_PER_WORD;
13154 /* When the frame pointer is the CFA, and we pop it, we are
13155 swapping back to the stack pointer as the CFA. This happens
13156 for stack frames that don't allocate other data, so we assume
13157 the stack pointer is now pointing at the return address, i.e.
13158 the function entry state, which makes the offset be 1 word. */
13159 if (reg == hard_frame_pointer_rtx)
13161 m->fs.fp_valid = false;
13162 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13164 m->fs.cfa_reg = stack_pointer_rtx;
13165 m->fs.cfa_offset -= UNITS_PER_WORD;
13167 add_reg_note (insn, REG_CFA_DEF_CFA,
13168 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13169 GEN_INT (m->fs.cfa_offset)));
13170 RTX_FRAME_RELATED_P (insn) = 1;
13175 /* Emit code to restore saved registers using POP insns. */
13178 ix86_emit_restore_regs_using_pop (void)
13180 unsigned int regno;
13182 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13183 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false))
13184 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
13187 /* Emit code and notes for the LEAVE instruction. */
13190 ix86_emit_leave (void)
13192 struct machine_function *m = cfun->machine;
13193 rtx_insn *insn = emit_insn (ix86_gen_leave ());
13195 ix86_add_queued_cfa_restore_notes (insn);
13197 gcc_assert (m->fs.fp_valid);
13198 m->fs.sp_valid = true;
13199 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
13200 m->fs.fp_valid = false;
13202 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
13204 m->fs.cfa_reg = stack_pointer_rtx;
13205 m->fs.cfa_offset = m->fs.sp_offset;
13207 add_reg_note (insn, REG_CFA_DEF_CFA,
13208 plus_constant (Pmode, stack_pointer_rtx,
13210 RTX_FRAME_RELATED_P (insn) = 1;
13212 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
13216 /* Emit code to restore saved registers using MOV insns.
13217 First register is restored from CFA - CFA_OFFSET. */
13219 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
13220 bool maybe_eh_return)
13222 struct machine_function *m = cfun->machine;
13223 unsigned int regno;
13225 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13226 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13228 rtx reg = gen_rtx_REG (word_mode, regno);
13232 mem = choose_baseaddr (cfa_offset);
13233 mem = gen_frame_mem (word_mode, mem);
13234 insn = emit_move_insn (reg, mem);
13236 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
13238 /* Previously we'd represented the CFA as an expression
13239 like *(%ebp - 8). We've just popped that value from
13240 the stack, which means we need to reset the CFA to
13241 the drap register. This will remain until we restore
13242 the stack pointer. */
13243 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
13244 RTX_FRAME_RELATED_P (insn) = 1;
13246 /* This means that the DRAP register is valid for addressing. */
13247 m->fs.drap_valid = true;
13250 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13252 cfa_offset -= UNITS_PER_WORD;
13256 /* Emit code to restore saved registers using MOV insns.
13257 First register is restored from CFA - CFA_OFFSET. */
13259 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
13260 bool maybe_eh_return)
13262 unsigned int regno;
13264 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13265 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
13267 rtx reg = gen_rtx_REG (V4SFmode, regno);
13269 unsigned int align;
13271 mem = choose_baseaddr (cfa_offset);
13272 mem = gen_rtx_MEM (V4SFmode, mem);
13274 /* The location is aligned up to INCOMING_STACK_BOUNDARY. */
13275 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
13276 set_mem_align (mem, align);
13278 /* SSE saves are not within re-aligned local stack frame.
13279 In case INCOMING_STACK_BOUNDARY is misaligned, we have
13280 to emit unaligned load. */
13283 rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
13285 emit_insn (gen_rtx_SET (reg, unspec));
13288 emit_insn (gen_rtx_SET (reg, mem));
13290 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
13292 cfa_offset -= GET_MODE_SIZE (V4SFmode);
13296 /* Restore function stack, frame, and registers. */
13299 ix86_expand_epilogue (int style)
13301 struct machine_function *m = cfun->machine;
13302 struct machine_frame_state frame_state_save = m->fs;
13303 struct ix86_frame frame;
13304 bool restore_regs_via_mov;
13307 ix86_finalize_stack_realign_flags ();
13308 ix86_compute_frame_layout (&frame);
13310 m->fs.sp_valid = (!frame_pointer_needed
13311 || (crtl->sp_is_unchanging
13312 && !stack_realign_fp));
13313 gcc_assert (!m->fs.sp_valid
13314 || m->fs.sp_offset == frame.stack_pointer_offset);
13316 /* The FP must be valid if the frame pointer is present. */
13317 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
13318 gcc_assert (!m->fs.fp_valid
13319 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
13321 /* We must have *some* valid pointer to the stack frame. */
13322 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
13324 /* The DRAP is never valid at this point. */
13325 gcc_assert (!m->fs.drap_valid);
13327 /* See the comment about red zone and frame
13328 pointer usage in ix86_expand_prologue. */
13329 if (frame_pointer_needed && frame.red_zone_size)
13330 emit_insn (gen_memory_blockage ());
13332 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
13333 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
13335 /* Determine the CFA offset of the end of the red-zone. */
13336 m->fs.red_zone_offset = 0;
13337 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
13339 /* The red-zone begins below the return address. */
13340 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
13342 /* When the register save area is in the aligned portion of
13343 the stack, determine the maximum runtime displacement that
13344 matches up with the aligned frame. */
13345 if (stack_realign_drap)
13346 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
13350 /* Special care must be taken for the normal return case of a function
13351 using eh_return: the eax and edx registers are marked as saved, but
13352 not restored along this path. Adjust the save location to match. */
13353 if (crtl->calls_eh_return && style != 2)
13354 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
13356 /* EH_RETURN requires the use of moves to function properly. */
13357 if (crtl->calls_eh_return)
13358 restore_regs_via_mov = true;
13359 /* SEH requires the use of pops to identify the epilogue. */
13360 else if (TARGET_SEH)
13361 restore_regs_via_mov = false;
13362 /* If we're only restoring one register and sp is not valid then
13363 using a move instruction to restore the register since it's
13364 less work than reloading sp and popping the register. */
13365 else if (!m->fs.sp_valid && frame.nregs <= 1)
13366 restore_regs_via_mov = true;
13367 else if (TARGET_EPILOGUE_USING_MOVE
13368 && cfun->machine->use_fast_prologue_epilogue
13369 && (frame.nregs > 1
13370 || m->fs.sp_offset != frame.reg_save_offset))
13371 restore_regs_via_mov = true;
13372 else if (frame_pointer_needed
13374 && m->fs.sp_offset != frame.reg_save_offset)
13375 restore_regs_via_mov = true;
13376 else if (frame_pointer_needed
13377 && TARGET_USE_LEAVE
13378 && cfun->machine->use_fast_prologue_epilogue
13379 && frame.nregs == 1)
13380 restore_regs_via_mov = true;
13382 restore_regs_via_mov = false;
13384 if (restore_regs_via_mov || frame.nsseregs)
13386 /* Ensure that the entire register save area is addressable via
13387 the stack pointer, if we will restore via sp. */
13389 && m->fs.sp_offset > 0x7fffffff
13390 && !(m->fs.fp_valid || m->fs.drap_valid)
13391 && (frame.nsseregs + frame.nregs) != 0)
13393 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13394 GEN_INT (m->fs.sp_offset
13395 - frame.sse_reg_save_offset),
13397 m->fs.cfa_reg == stack_pointer_rtx);
13401 /* If there are any SSE registers to restore, then we have to do it
13402 via moves, since there's obviously no pop for SSE regs. */
13403 if (frame.nsseregs)
13404 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
13407 if (restore_regs_via_mov)
13412 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
13414 /* eh_return epilogues need %ecx added to the stack pointer. */
13417 rtx sa = EH_RETURN_STACKADJ_RTX;
13420 /* Stack align doesn't work with eh_return. */
13421 gcc_assert (!stack_realign_drap);
13422 /* Neither does regparm nested functions. */
13423 gcc_assert (!ix86_static_chain_on_stack);
13425 if (frame_pointer_needed)
13427 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
13428 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
13429 emit_insn (gen_rtx_SET (sa, t));
13431 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
13432 insn = emit_move_insn (hard_frame_pointer_rtx, t);
13434 /* Note that we use SA as a temporary CFA, as the return
13435 address is at the proper place relative to it. We
13436 pretend this happens at the FP restore insn because
13437 prior to this insn the FP would be stored at the wrong
13438 offset relative to SA, and after this insn we have no
13439 other reasonable register to use for the CFA. We don't
13440 bother resetting the CFA to the SP for the duration of
13441 the return insn. */
13442 add_reg_note (insn, REG_CFA_DEF_CFA,
13443 plus_constant (Pmode, sa, UNITS_PER_WORD));
13444 ix86_add_queued_cfa_restore_notes (insn);
13445 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
13446 RTX_FRAME_RELATED_P (insn) = 1;
13448 m->fs.cfa_reg = sa;
13449 m->fs.cfa_offset = UNITS_PER_WORD;
13450 m->fs.fp_valid = false;
13452 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
13453 const0_rtx, style, false);
13457 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
13458 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
13459 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
13460 ix86_add_queued_cfa_restore_notes (insn);
13462 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
13463 if (m->fs.cfa_offset != UNITS_PER_WORD)
13465 m->fs.cfa_offset = UNITS_PER_WORD;
13466 add_reg_note (insn, REG_CFA_DEF_CFA,
13467 plus_constant (Pmode, stack_pointer_rtx,
13469 RTX_FRAME_RELATED_P (insn) = 1;
13472 m->fs.sp_offset = UNITS_PER_WORD;
13473 m->fs.sp_valid = true;
13478 /* SEH requires that the function end with (1) a stack adjustment
13479 if necessary, (2) a sequence of pops, and (3) a return or
13480 jump instruction. Prevent insns from the function body from
13481 being scheduled into this sequence. */
13484 /* Prevent a catch region from being adjacent to the standard
13485 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
13486 several other flags that would be interesting to test are
13488 if (flag_non_call_exceptions)
13489 emit_insn (gen_nops (const1_rtx));
13491 emit_insn (gen_blockage ());
13494 /* First step is to deallocate the stack frame so that we can
13495 pop the registers. Also do it on SEH target for very large
13496 frame as the emitted instructions aren't allowed by the ABI in
13498 if (!m->fs.sp_valid
13500 && (m->fs.sp_offset - frame.reg_save_offset
13501 >= SEH_MAX_FRAME_SIZE)))
13503 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
13504 GEN_INT (m->fs.fp_offset
13505 - frame.reg_save_offset),
13508 else if (m->fs.sp_offset != frame.reg_save_offset)
13510 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13511 GEN_INT (m->fs.sp_offset
13512 - frame.reg_save_offset),
13514 m->fs.cfa_reg == stack_pointer_rtx);
13517 ix86_emit_restore_regs_using_pop ();
13520 /* If we used a stack pointer and haven't already got rid of it,
13522 if (m->fs.fp_valid)
13524 /* If the stack pointer is valid and pointing at the frame
13525 pointer store address, then we only need a pop. */
13526 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
13527 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13528 /* Leave results in shorter dependency chains on CPUs that are
13529 able to grok it fast. */
13530 else if (TARGET_USE_LEAVE
13531 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
13532 || !cfun->machine->use_fast_prologue_epilogue)
13533 ix86_emit_leave ();
13536 pro_epilogue_adjust_stack (stack_pointer_rtx,
13537 hard_frame_pointer_rtx,
13538 const0_rtx, style, !using_drap);
13539 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
13545 int param_ptr_offset = UNITS_PER_WORD;
13548 gcc_assert (stack_realign_drap);
13550 if (ix86_static_chain_on_stack)
13551 param_ptr_offset += UNITS_PER_WORD;
13552 if (!call_used_regs[REGNO (crtl->drap_reg)])
13553 param_ptr_offset += UNITS_PER_WORD;
13555 insn = emit_insn (gen_rtx_SET
13556 (stack_pointer_rtx,
13557 gen_rtx_PLUS (Pmode,
13559 GEN_INT (-param_ptr_offset))));
13560 m->fs.cfa_reg = stack_pointer_rtx;
13561 m->fs.cfa_offset = param_ptr_offset;
13562 m->fs.sp_offset = param_ptr_offset;
13563 m->fs.realigned = false;
13565 add_reg_note (insn, REG_CFA_DEF_CFA,
13566 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13567 GEN_INT (param_ptr_offset)));
13568 RTX_FRAME_RELATED_P (insn) = 1;
13570 if (!call_used_regs[REGNO (crtl->drap_reg)])
13571 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
13574 /* At this point the stack pointer must be valid, and we must have
13575 restored all of the registers. We may not have deallocated the
13576 entire stack frame. We've delayed this until now because it may
13577 be possible to merge the local stack deallocation with the
13578 deallocation forced by ix86_static_chain_on_stack. */
13579 gcc_assert (m->fs.sp_valid);
13580 gcc_assert (!m->fs.fp_valid);
13581 gcc_assert (!m->fs.realigned);
13582 if (m->fs.sp_offset != UNITS_PER_WORD)
13584 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13585 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
13589 ix86_add_queued_cfa_restore_notes (get_last_insn ());
13591 /* Sibcall epilogues don't want a return instruction. */
13594 m->fs = frame_state_save;
13598 if (crtl->args.pops_args && crtl->args.size)
13600 rtx popc = GEN_INT (crtl->args.pops_args);
13602 /* i386 can only pop 64K bytes. If asked to pop more, pop return
13603 address, do explicit add, and jump indirectly to the caller. */
13605 if (crtl->args.pops_args >= 65536)
13607 rtx ecx = gen_rtx_REG (SImode, CX_REG);
13610 /* There is no "pascal" calling convention in any 64bit ABI. */
13611 gcc_assert (!TARGET_64BIT);
13613 insn = emit_insn (gen_pop (ecx));
13614 m->fs.cfa_offset -= UNITS_PER_WORD;
13615 m->fs.sp_offset -= UNITS_PER_WORD;
13617 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
13618 x = gen_rtx_SET (stack_pointer_rtx, x);
13619 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
13620 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
13621 RTX_FRAME_RELATED_P (insn) = 1;
13623 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
13625 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
13628 emit_jump_insn (gen_simple_return_pop_internal (popc));
13631 emit_jump_insn (gen_simple_return_internal ());
13633 /* Restore the state back to the state from the prologue,
13634 so that it's correct for the next epilogue. */
13635 m->fs = frame_state_save;
13638 /* Reset from the function's potential modifications. */
13641 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
13643 if (pic_offset_table_rtx
13644 && !ix86_use_pseudo_pic_reg ())
13645 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
13647 /* Mach-O doesn't support labels at the end of objects, so if
13648 it looks like we might want one, insert a NOP. */
13650 rtx_insn *insn = get_last_insn ();
13651 rtx_insn *deleted_debug_label = NULL;
13654 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
13656 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
13657 notes only, instead set their CODE_LABEL_NUMBER to -1,
13658 otherwise there would be code generation differences
13659 in between -g and -g0. */
13660 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13661 deleted_debug_label = insn;
13662 insn = PREV_INSN (insn);
13667 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
13668 fputs ("\tnop\n", file);
13669 else if (deleted_debug_label)
13670 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
13671 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
13672 CODE_LABEL_NUMBER (insn) = -1;
13678 /* Return a scratch register to use in the split stack prologue. The
13679 split stack prologue is used for -fsplit-stack. It is the first
13680 instructions in the function, even before the regular prologue.
13681 The scratch register can be any caller-saved register which is not
13682 used for parameters or for the static chain. */
13684 static unsigned int
13685 split_stack_prologue_scratch_regno (void)
13691 bool is_fastcall, is_thiscall;
13694 is_fastcall = (lookup_attribute ("fastcall",
13695 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13697 is_thiscall = (lookup_attribute ("thiscall",
13698 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
13700 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
13704 if (DECL_STATIC_CHAIN (cfun->decl))
13706 sorry ("-fsplit-stack does not support fastcall with "
13707 "nested function");
13708 return INVALID_REGNUM;
13712 else if (is_thiscall)
13714 if (!DECL_STATIC_CHAIN (cfun->decl))
13718 else if (regparm < 3)
13720 if (!DECL_STATIC_CHAIN (cfun->decl))
13726 sorry ("-fsplit-stack does not support 2 register "
13727 "parameters for a nested function");
13728 return INVALID_REGNUM;
13735 /* FIXME: We could make this work by pushing a register
13736 around the addition and comparison. */
13737 sorry ("-fsplit-stack does not support 3 register parameters");
13738 return INVALID_REGNUM;
13743 /* A SYMBOL_REF for the function which allocates new stackspace for
13746 static GTY(()) rtx split_stack_fn;
13748 /* A SYMBOL_REF for the more stack function when using the large
13751 static GTY(()) rtx split_stack_fn_large;
13753 /* Handle -fsplit-stack. These are the first instructions in the
13754 function, even before the regular prologue. */
13757 ix86_expand_split_stack_prologue (void)
13759 struct ix86_frame frame;
13760 HOST_WIDE_INT allocate;
13761 unsigned HOST_WIDE_INT args_size;
13762 rtx_code_label *label;
13763 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
13764 rtx scratch_reg = NULL_RTX;
13765 rtx_code_label *varargs_label = NULL;
13768 gcc_assert (flag_split_stack && reload_completed);
13770 ix86_finalize_stack_realign_flags ();
13771 ix86_compute_frame_layout (&frame);
13772 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
13774 /* This is the label we will branch to if we have enough stack
13775 space. We expect the basic block reordering pass to reverse this
13776 branch if optimizing, so that we branch in the unlikely case. */
13777 label = gen_label_rtx ();
13779 /* We need to compare the stack pointer minus the frame size with
13780 the stack boundary in the TCB. The stack boundary always gives
13781 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
13782 can compare directly. Otherwise we need to do an addition. */
13784 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13785 UNSPEC_STACK_CHECK);
13786 limit = gen_rtx_CONST (Pmode, limit);
13787 limit = gen_rtx_MEM (Pmode, limit);
13788 if (allocate < SPLIT_STACK_AVAILABLE)
13789 current = stack_pointer_rtx;
13792 unsigned int scratch_regno;
13795 /* We need a scratch register to hold the stack pointer minus
13796 the required frame size. Since this is the very start of the
13797 function, the scratch register can be any caller-saved
13798 register which is not used for parameters. */
13799 offset = GEN_INT (- allocate);
13800 scratch_regno = split_stack_prologue_scratch_regno ();
13801 if (scratch_regno == INVALID_REGNUM)
13803 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13804 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
13806 /* We don't use ix86_gen_add3 in this case because it will
13807 want to split to lea, but when not optimizing the insn
13808 will not be split after this point. */
13809 emit_insn (gen_rtx_SET (scratch_reg,
13810 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13815 emit_move_insn (scratch_reg, offset);
13816 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
13817 stack_pointer_rtx));
13819 current = scratch_reg;
13822 ix86_expand_branch (GEU, current, limit, label);
13823 jump_insn = get_last_insn ();
13824 JUMP_LABEL (jump_insn) = label;
13826 /* Mark the jump as very likely to be taken. */
13827 add_int_reg_note (jump_insn, REG_BR_PROB,
13828 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
13830 if (split_stack_fn == NULL_RTX)
13832 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
13833 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
13835 fn = split_stack_fn;
13837 /* Get more stack space. We pass in the desired stack space and the
13838 size of the arguments to copy to the new stack. In 32-bit mode
13839 we push the parameters; __morestack will return on a new stack
13840 anyhow. In 64-bit mode we pass the parameters in r10 and
13842 allocate_rtx = GEN_INT (allocate);
13843 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
13844 call_fusage = NULL_RTX;
13849 reg10 = gen_rtx_REG (Pmode, R10_REG);
13850 reg11 = gen_rtx_REG (Pmode, R11_REG);
13852 /* If this function uses a static chain, it will be in %r10.
13853 Preserve it across the call to __morestack. */
13854 if (DECL_STATIC_CHAIN (cfun->decl))
13858 rax = gen_rtx_REG (word_mode, AX_REG);
13859 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
13860 use_reg (&call_fusage, rax);
13863 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
13866 HOST_WIDE_INT argval;
13868 gcc_assert (Pmode == DImode);
13869 /* When using the large model we need to load the address
13870 into a register, and we've run out of registers. So we
13871 switch to a different calling convention, and we call a
13872 different function: __morestack_large. We pass the
13873 argument size in the upper 32 bits of r10 and pass the
13874 frame size in the lower 32 bits. */
13875 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
13876 gcc_assert ((args_size & 0xffffffff) == args_size);
13878 if (split_stack_fn_large == NULL_RTX)
13880 split_stack_fn_large =
13881 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
13882 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
13884 if (ix86_cmodel == CM_LARGE_PIC)
13886 rtx_code_label *label;
13889 label = gen_label_rtx ();
13890 emit_label (label);
13891 LABEL_PRESERVE_P (label) = 1;
13892 emit_insn (gen_set_rip_rex64 (reg10, label));
13893 emit_insn (gen_set_got_offset_rex64 (reg11, label));
13894 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
13895 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
13897 x = gen_rtx_CONST (Pmode, x);
13898 emit_move_insn (reg11, x);
13899 x = gen_rtx_PLUS (Pmode, reg10, reg11);
13900 x = gen_const_mem (Pmode, x);
13901 emit_move_insn (reg11, x);
13904 emit_move_insn (reg11, split_stack_fn_large);
13908 argval = ((args_size << 16) << 16) + allocate;
13909 emit_move_insn (reg10, GEN_INT (argval));
13913 emit_move_insn (reg10, allocate_rtx);
13914 emit_move_insn (reg11, GEN_INT (args_size));
13915 use_reg (&call_fusage, reg11);
13918 use_reg (&call_fusage, reg10);
13922 emit_insn (gen_push (GEN_INT (args_size)));
13923 emit_insn (gen_push (allocate_rtx));
13925 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
13926 GEN_INT (UNITS_PER_WORD), constm1_rtx,
13928 add_function_usage_to (call_insn, call_fusage);
13930 /* In order to make call/return prediction work right, we now need
13931 to execute a return instruction. See
13932 libgcc/config/i386/morestack.S for the details on how this works.
13934 For flow purposes gcc must not see this as a return
13935 instruction--we need control flow to continue at the subsequent
13936 label. Therefore, we use an unspec. */
13937 gcc_assert (crtl->args.pops_args < 65536);
13938 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
13940 /* If we are in 64-bit mode and this function uses a static chain,
13941 we saved %r10 in %rax before calling _morestack. */
13942 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
13943 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
13944 gen_rtx_REG (word_mode, AX_REG));
13946 /* If this function calls va_start, we need to store a pointer to
13947 the arguments on the old stack, because they may not have been
13948 all copied to the new stack. At this point the old stack can be
13949 found at the frame pointer value used by __morestack, because
13950 __morestack has set that up before calling back to us. Here we
13951 store that pointer in a scratch register, and in
13952 ix86_expand_prologue we store the scratch register in a stack
13954 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
13956 unsigned int scratch_regno;
13960 scratch_regno = split_stack_prologue_scratch_regno ();
13961 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
13962 frame_reg = gen_rtx_REG (Pmode, BP_REG);
13966 return address within this function
13967 return address of caller of this function
13969 So we add three words to get to the stack arguments.
13973 return address within this function
13974 first argument to __morestack
13975 second argument to __morestack
13976 return address of caller of this function
13978 So we add five words to get to the stack arguments.
13980 words = TARGET_64BIT ? 3 : 5;
13981 emit_insn (gen_rtx_SET (scratch_reg,
13982 gen_rtx_PLUS (Pmode, frame_reg,
13983 GEN_INT (words * UNITS_PER_WORD))));
13985 varargs_label = gen_label_rtx ();
13986 emit_jump_insn (gen_jump (varargs_label));
13987 JUMP_LABEL (get_last_insn ()) = varargs_label;
13992 emit_label (label);
13993 LABEL_NUSES (label) = 1;
13995 /* If this function calls va_start, we now have to set the scratch
13996 register for the case where we do not call __morestack. In this
13997 case we need to set it based on the stack pointer. */
13998 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14000 emit_insn (gen_rtx_SET (scratch_reg,
14001 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14002 GEN_INT (UNITS_PER_WORD))));
14004 emit_label (varargs_label);
14005 LABEL_NUSES (varargs_label) = 1;
14009 /* We may have to tell the dataflow pass that the split stack prologue
14010 is initializing a scratch register. */
14013 ix86_live_on_entry (bitmap regs)
14015 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
14017 gcc_assert (flag_split_stack);
14018 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
14022 /* Extract the parts of an RTL expression that is a valid memory address
14023 for an instruction. Return 0 if the structure of the address is
14024 grossly off. Return -1 if the address contains ASHIFT, so it is not
14025 strictly valid, but still used for computing length of lea instruction. */
14028 ix86_decompose_address (rtx addr, struct ix86_address *out)
14030 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
14031 rtx base_reg, index_reg;
14032 HOST_WIDE_INT scale = 1;
14033 rtx scale_rtx = NULL_RTX;
14036 addr_space_t seg = ADDR_SPACE_GENERIC;
14038 /* Allow zero-extended SImode addresses,
14039 they will be emitted with addr32 prefix. */
14040 if (TARGET_64BIT && GET_MODE (addr) == DImode)
14042 if (GET_CODE (addr) == ZERO_EXTEND
14043 && GET_MODE (XEXP (addr, 0)) == SImode)
14045 addr = XEXP (addr, 0);
14046 if (CONST_INT_P (addr))
14049 else if (GET_CODE (addr) == AND
14050 && const_32bit_mask (XEXP (addr, 1), DImode))
14052 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
14053 if (addr == NULL_RTX)
14056 if (CONST_INT_P (addr))
14061 /* Allow SImode subregs of DImode addresses,
14062 they will be emitted with addr32 prefix. */
14063 if (TARGET_64BIT && GET_MODE (addr) == SImode)
14065 if (SUBREG_P (addr)
14066 && GET_MODE (SUBREG_REG (addr)) == DImode)
14068 addr = SUBREG_REG (addr);
14069 if (CONST_INT_P (addr))
14076 else if (SUBREG_P (addr))
14078 if (REG_P (SUBREG_REG (addr)))
14083 else if (GET_CODE (addr) == PLUS)
14085 rtx addends[4], op;
14093 addends[n++] = XEXP (op, 1);
14096 while (GET_CODE (op) == PLUS);
14101 for (i = n; i >= 0; --i)
14104 switch (GET_CODE (op))
14109 index = XEXP (op, 0);
14110 scale_rtx = XEXP (op, 1);
14116 index = XEXP (op, 0);
14117 tmp = XEXP (op, 1);
14118 if (!CONST_INT_P (tmp))
14120 scale = INTVAL (tmp);
14121 if ((unsigned HOST_WIDE_INT) scale > 3)
14123 scale = 1 << scale;
14128 if (GET_CODE (op) != UNSPEC)
14133 if (XINT (op, 1) == UNSPEC_TP
14134 && TARGET_TLS_DIRECT_SEG_REFS
14135 && seg == ADDR_SPACE_GENERIC)
14136 seg = DEFAULT_TLS_SEG_REG;
14142 if (!REG_P (SUBREG_REG (op)))
14169 else if (GET_CODE (addr) == MULT)
14171 index = XEXP (addr, 0); /* index*scale */
14172 scale_rtx = XEXP (addr, 1);
14174 else if (GET_CODE (addr) == ASHIFT)
14176 /* We're called for lea too, which implements ashift on occasion. */
14177 index = XEXP (addr, 0);
14178 tmp = XEXP (addr, 1);
14179 if (!CONST_INT_P (tmp))
14181 scale = INTVAL (tmp);
14182 if ((unsigned HOST_WIDE_INT) scale > 3)
14184 scale = 1 << scale;
14188 disp = addr; /* displacement */
14194 else if (SUBREG_P (index)
14195 && REG_P (SUBREG_REG (index)))
14201 /* Extract the integral value of scale. */
14204 if (!CONST_INT_P (scale_rtx))
14206 scale = INTVAL (scale_rtx);
14209 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
14210 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
14212 /* Avoid useless 0 displacement. */
14213 if (disp == const0_rtx && (base || index))
14216 /* Allow arg pointer and stack pointer as index if there is not scaling. */
14217 if (base_reg && index_reg && scale == 1
14218 && (index_reg == arg_pointer_rtx
14219 || index_reg == frame_pointer_rtx
14220 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
14222 std::swap (base, index);
14223 std::swap (base_reg, index_reg);
14226 /* Special case: %ebp cannot be encoded as a base without a displacement.
14230 && (base_reg == hard_frame_pointer_rtx
14231 || base_reg == frame_pointer_rtx
14232 || base_reg == arg_pointer_rtx
14233 || (REG_P (base_reg)
14234 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
14235 || REGNO (base_reg) == R13_REG))))
14238 /* Special case: on K6, [%esi] makes the instruction vector decoded.
14239 Avoid this by transforming to [%esi+0].
14240 Reload calls address legitimization without cfun defined, so we need
14241 to test cfun for being non-NULL. */
14242 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
14243 && base_reg && !index_reg && !disp
14244 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
14247 /* Special case: encode reg+reg instead of reg*2. */
14248 if (!base && index && scale == 2)
14249 base = index, base_reg = index_reg, scale = 1;
14251 /* Special case: scaling cannot be encoded without base or displacement. */
14252 if (!base && !disp && index && scale != 1)
14256 out->index = index;
14258 out->scale = scale;
14264 /* Return cost of the memory address x.
14265 For i386, it is better to use a complex address than let gcc copy
14266 the address into a reg and make a new pseudo. But not if the address
14267 requires to two regs - that would mean more pseudos with longer
14270 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
14272 struct ix86_address parts;
14274 int ok = ix86_decompose_address (x, &parts);
14278 if (parts.base && SUBREG_P (parts.base))
14279 parts.base = SUBREG_REG (parts.base);
14280 if (parts.index && SUBREG_P (parts.index))
14281 parts.index = SUBREG_REG (parts.index);
14283 /* Attempt to minimize number of registers in the address by increasing
14284 address cost for each used register. We don't increase address cost
14285 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
14286 is not invariant itself it most likely means that base or index is not
14287 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
14288 which is not profitable for x86. */
14290 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
14291 && (current_pass->type == GIMPLE_PASS
14292 || !pic_offset_table_rtx
14293 || !REG_P (parts.base)
14294 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
14298 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
14299 && (current_pass->type == GIMPLE_PASS
14300 || !pic_offset_table_rtx
14301 || !REG_P (parts.index)
14302 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
14305 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
14306 since it's predecode logic can't detect the length of instructions
14307 and it degenerates to vector decoded. Increase cost of such
14308 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
14309 to split such addresses or even refuse such addresses at all.
14311 Following addressing modes are affected:
14316 The first and last case may be avoidable by explicitly coding the zero in
14317 memory address, but I don't have AMD-K6 machine handy to check this
14321 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
14322 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
14323 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
14329 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
14330 this is used for to form addresses to local data when -fPIC is in
14334 darwin_local_data_pic (rtx disp)
14336 return (GET_CODE (disp) == UNSPEC
14337 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
14340 /* Determine if a given RTX is a valid constant. We already know this
14341 satisfies CONSTANT_P. */
14344 ix86_legitimate_constant_p (machine_mode, rtx x)
14346 /* Pointer bounds constants are not valid. */
14347 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
14350 switch (GET_CODE (x))
14355 if (GET_CODE (x) == PLUS)
14357 if (!CONST_INT_P (XEXP (x, 1)))
14362 if (TARGET_MACHO && darwin_local_data_pic (x))
14365 /* Only some unspecs are valid as "constants". */
14366 if (GET_CODE (x) == UNSPEC)
14367 switch (XINT (x, 1))
14370 case UNSPEC_GOTOFF:
14371 case UNSPEC_PLTOFF:
14372 return TARGET_64BIT;
14374 case UNSPEC_NTPOFF:
14375 x = XVECEXP (x, 0, 0);
14376 return (GET_CODE (x) == SYMBOL_REF
14377 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14378 case UNSPEC_DTPOFF:
14379 x = XVECEXP (x, 0, 0);
14380 return (GET_CODE (x) == SYMBOL_REF
14381 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
14386 /* We must have drilled down to a symbol. */
14387 if (GET_CODE (x) == LABEL_REF)
14389 if (GET_CODE (x) != SYMBOL_REF)
14394 /* TLS symbols are never valid. */
14395 if (SYMBOL_REF_TLS_MODEL (x))
14398 /* DLLIMPORT symbols are never valid. */
14399 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14400 && SYMBOL_REF_DLLIMPORT_P (x))
14404 /* mdynamic-no-pic */
14405 if (MACHO_DYNAMIC_NO_PIC_P)
14406 return machopic_symbol_defined_p (x);
14410 case CONST_WIDE_INT:
14411 if (!TARGET_64BIT && !standard_sse_constant_p (x))
14416 if (!standard_sse_constant_p (x))
14423 /* Otherwise we handle everything else in the move patterns. */
14427 /* Determine if it's legal to put X into the constant pool. This
14428 is not possible for the address of thread-local symbols, which
14429 is checked above. */
14432 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
14434 /* We can always put integral constants and vectors in memory. */
14435 switch (GET_CODE (x))
14438 case CONST_WIDE_INT:
14446 return !ix86_legitimate_constant_p (mode, x);
14449 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
14453 is_imported_p (rtx x)
14455 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
14456 || GET_CODE (x) != SYMBOL_REF)
14459 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
14463 /* Nonzero if the constant value X is a legitimate general operand
14464 when generating PIC code. It is given that flag_pic is on and
14465 that X satisfies CONSTANT_P. */
14468 legitimate_pic_operand_p (rtx x)
14472 switch (GET_CODE (x))
14475 inner = XEXP (x, 0);
14476 if (GET_CODE (inner) == PLUS
14477 && CONST_INT_P (XEXP (inner, 1)))
14478 inner = XEXP (inner, 0);
14480 /* Only some unspecs are valid as "constants". */
14481 if (GET_CODE (inner) == UNSPEC)
14482 switch (XINT (inner, 1))
14485 case UNSPEC_GOTOFF:
14486 case UNSPEC_PLTOFF:
14487 return TARGET_64BIT;
14489 x = XVECEXP (inner, 0, 0);
14490 return (GET_CODE (x) == SYMBOL_REF
14491 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
14492 case UNSPEC_MACHOPIC_OFFSET:
14493 return legitimate_pic_address_disp_p (x);
14501 return legitimate_pic_address_disp_p (x);
14508 /* Determine if a given CONST RTX is a valid memory displacement
14512 legitimate_pic_address_disp_p (rtx disp)
14516 /* In 64bit mode we can allow direct addresses of symbols and labels
14517 when they are not dynamic symbols. */
14520 rtx op0 = disp, op1;
14522 switch (GET_CODE (disp))
14528 if (GET_CODE (XEXP (disp, 0)) != PLUS)
14530 op0 = XEXP (XEXP (disp, 0), 0);
14531 op1 = XEXP (XEXP (disp, 0), 1);
14532 if (!CONST_INT_P (op1)
14533 || INTVAL (op1) >= 16*1024*1024
14534 || INTVAL (op1) < -16*1024*1024)
14536 if (GET_CODE (op0) == LABEL_REF)
14538 if (GET_CODE (op0) == CONST
14539 && GET_CODE (XEXP (op0, 0)) == UNSPEC
14540 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
14542 if (GET_CODE (op0) == UNSPEC
14543 && XINT (op0, 1) == UNSPEC_PCREL)
14545 if (GET_CODE (op0) != SYMBOL_REF)
14550 /* TLS references should always be enclosed in UNSPEC.
14551 The dllimported symbol needs always to be resolved. */
14552 if (SYMBOL_REF_TLS_MODEL (op0)
14553 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
14558 if (is_imported_p (op0))
14561 if (SYMBOL_REF_FAR_ADDR_P (op0)
14562 || !SYMBOL_REF_LOCAL_P (op0))
14565 /* Function-symbols need to be resolved only for
14567 For the small-model we don't need to resolve anything
14569 if ((ix86_cmodel != CM_LARGE_PIC
14570 && SYMBOL_REF_FUNCTION_P (op0))
14571 || ix86_cmodel == CM_SMALL_PIC)
14573 /* Non-external symbols don't need to be resolved for
14574 large, and medium-model. */
14575 if ((ix86_cmodel == CM_LARGE_PIC
14576 || ix86_cmodel == CM_MEDIUM_PIC)
14577 && !SYMBOL_REF_EXTERNAL_P (op0))
14580 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
14581 && (SYMBOL_REF_LOCAL_P (op0)
14582 || (HAVE_LD_PIE_COPYRELOC
14584 && !SYMBOL_REF_WEAK (op0)
14585 && !SYMBOL_REF_FUNCTION_P (op0)))
14586 && ix86_cmodel != CM_LARGE_PIC)
14594 if (GET_CODE (disp) != CONST)
14596 disp = XEXP (disp, 0);
14600 /* We are unsafe to allow PLUS expressions. This limit allowed distance
14601 of GOT tables. We should not need these anyway. */
14602 if (GET_CODE (disp) != UNSPEC
14603 || (XINT (disp, 1) != UNSPEC_GOTPCREL
14604 && XINT (disp, 1) != UNSPEC_GOTOFF
14605 && XINT (disp, 1) != UNSPEC_PCREL
14606 && XINT (disp, 1) != UNSPEC_PLTOFF))
14609 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
14610 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
14616 if (GET_CODE (disp) == PLUS)
14618 if (!CONST_INT_P (XEXP (disp, 1)))
14620 disp = XEXP (disp, 0);
14624 if (TARGET_MACHO && darwin_local_data_pic (disp))
14627 if (GET_CODE (disp) != UNSPEC)
14630 switch (XINT (disp, 1))
14635 /* We need to check for both symbols and labels because VxWorks loads
14636 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
14638 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14639 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
14640 case UNSPEC_GOTOFF:
14641 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
14642 While ABI specify also 32bit relocation but we don't produce it in
14643 small PIC model at all. */
14644 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
14645 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
14647 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
14649 case UNSPEC_GOTTPOFF:
14650 case UNSPEC_GOTNTPOFF:
14651 case UNSPEC_INDNTPOFF:
14654 disp = XVECEXP (disp, 0, 0);
14655 return (GET_CODE (disp) == SYMBOL_REF
14656 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
14657 case UNSPEC_NTPOFF:
14658 disp = XVECEXP (disp, 0, 0);
14659 return (GET_CODE (disp) == SYMBOL_REF
14660 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
14661 case UNSPEC_DTPOFF:
14662 disp = XVECEXP (disp, 0, 0);
14663 return (GET_CODE (disp) == SYMBOL_REF
14664 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
14670 /* Determine if op is suitable RTX for an address register.
14671 Return naked register if a register or a register subreg is
14672 found, otherwise return NULL_RTX. */
14675 ix86_validate_address_register (rtx op)
14677 machine_mode mode = GET_MODE (op);
14679 /* Only SImode or DImode registers can form the address. */
14680 if (mode != SImode && mode != DImode)
14685 else if (SUBREG_P (op))
14687 rtx reg = SUBREG_REG (op);
14692 mode = GET_MODE (reg);
14694 /* Don't allow SUBREGs that span more than a word. It can
14695 lead to spill failures when the register is one word out
14696 of a two word structure. */
14697 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
14700 /* Allow only SUBREGs of non-eliminable hard registers. */
14701 if (register_no_elim_operand (reg, mode))
14705 /* Op is not a register. */
14709 /* Recognizes RTL expressions that are valid memory addresses for an
14710 instruction. The MODE argument is the machine mode for the MEM
14711 expression that wants to use this address.
14713 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
14714 convert common non-canonical forms to canonical form so that they will
14718 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
14720 struct ix86_address parts;
14721 rtx base, index, disp;
14722 HOST_WIDE_INT scale;
14725 if (ix86_decompose_address (addr, &parts) <= 0)
14726 /* Decomposition failed. */
14730 index = parts.index;
14732 scale = parts.scale;
14735 /* Validate base register. */
14738 rtx reg = ix86_validate_address_register (base);
14740 if (reg == NULL_RTX)
14743 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
14744 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
14745 /* Base is not valid. */
14749 /* Validate index register. */
14752 rtx reg = ix86_validate_address_register (index);
14754 if (reg == NULL_RTX)
14757 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
14758 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
14759 /* Index is not valid. */
14763 /* Index and base should have the same mode. */
14765 && GET_MODE (base) != GET_MODE (index))
14768 /* Address override works only on the (%reg) part of %fs:(%reg). */
14769 if (seg != ADDR_SPACE_GENERIC
14770 && ((base && GET_MODE (base) != word_mode)
14771 || (index && GET_MODE (index) != word_mode)))
14774 /* Validate scale factor. */
14778 /* Scale without index. */
14781 if (scale != 2 && scale != 4 && scale != 8)
14782 /* Scale is not a valid multiplier. */
14786 /* Validate displacement. */
14789 if (GET_CODE (disp) == CONST
14790 && GET_CODE (XEXP (disp, 0)) == UNSPEC
14791 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
14792 switch (XINT (XEXP (disp, 0), 1))
14794 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
14795 used. While ABI specify also 32bit relocations, we don't produce
14796 them at all and use IP relative instead. */
14798 case UNSPEC_GOTOFF:
14799 gcc_assert (flag_pic);
14801 goto is_legitimate_pic;
14803 /* 64bit address unspec. */
14806 case UNSPEC_GOTPCREL:
14808 gcc_assert (flag_pic);
14809 goto is_legitimate_pic;
14811 case UNSPEC_GOTTPOFF:
14812 case UNSPEC_GOTNTPOFF:
14813 case UNSPEC_INDNTPOFF:
14814 case UNSPEC_NTPOFF:
14815 case UNSPEC_DTPOFF:
14818 case UNSPEC_STACK_CHECK:
14819 gcc_assert (flag_split_stack);
14823 /* Invalid address unspec. */
14827 else if (SYMBOLIC_CONST (disp)
14831 && MACHOPIC_INDIRECT
14832 && !machopic_operand_p (disp)
14838 if (TARGET_64BIT && (index || base))
14840 /* foo@dtpoff(%rX) is ok. */
14841 if (GET_CODE (disp) != CONST
14842 || GET_CODE (XEXP (disp, 0)) != PLUS
14843 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
14844 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
14845 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
14846 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
14847 /* Non-constant pic memory reference. */
14850 else if ((!TARGET_MACHO || flag_pic)
14851 && ! legitimate_pic_address_disp_p (disp))
14852 /* Displacement is an invalid pic construct. */
14855 else if (MACHO_DYNAMIC_NO_PIC_P
14856 && !ix86_legitimate_constant_p (Pmode, disp))
14857 /* displacment must be referenced via non_lazy_pointer */
14861 /* This code used to verify that a symbolic pic displacement
14862 includes the pic_offset_table_rtx register.
14864 While this is good idea, unfortunately these constructs may
14865 be created by "adds using lea" optimization for incorrect
14874 This code is nonsensical, but results in addressing
14875 GOT table with pic_offset_table_rtx base. We can't
14876 just refuse it easily, since it gets matched by
14877 "addsi3" pattern, that later gets split to lea in the
14878 case output register differs from input. While this
14879 can be handled by separate addsi pattern for this case
14880 that never results in lea, this seems to be easier and
14881 correct fix for crash to disable this test. */
14883 else if (GET_CODE (disp) != LABEL_REF
14884 && !CONST_INT_P (disp)
14885 && (GET_CODE (disp) != CONST
14886 || !ix86_legitimate_constant_p (Pmode, disp))
14887 && (GET_CODE (disp) != SYMBOL_REF
14888 || !ix86_legitimate_constant_p (Pmode, disp)))
14889 /* Displacement is not constant. */
14891 else if (TARGET_64BIT
14892 && !x86_64_immediate_operand (disp, VOIDmode))
14893 /* Displacement is out of range. */
14895 /* In x32 mode, constant addresses are sign extended to 64bit, so
14896 we have to prevent addresses from 0x80000000 to 0xffffffff. */
14897 else if (TARGET_X32 && !(index || base)
14898 && CONST_INT_P (disp)
14899 && val_signbit_known_set_p (SImode, INTVAL (disp)))
14903 /* Everything looks valid. */
14907 /* Determine if a given RTX is a valid constant address. */
14910 constant_address_p (rtx x)
14912 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
14915 /* Return a unique alias set for the GOT. */
14917 static alias_set_type
14918 ix86_GOT_alias_set (void)
14920 static alias_set_type set = -1;
14922 set = new_alias_set ();
14926 /* Return a legitimate reference for ORIG (an address) using the
14927 register REG. If REG is 0, a new pseudo is generated.
14929 There are two types of references that must be handled:
14931 1. Global data references must load the address from the GOT, via
14932 the PIC reg. An insn is emitted to do this load, and the reg is
14935 2. Static data references, constant pool addresses, and code labels
14936 compute the address as an offset from the GOT, whose base is in
14937 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
14938 differentiate them from global data objects. The returned
14939 address is the PIC reg + an unspec constant.
14941 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
14942 reg also appears in the address. */
14945 legitimize_pic_address (rtx orig, rtx reg)
14948 rtx new_rtx = orig;
14951 if (TARGET_MACHO && !TARGET_64BIT)
14954 reg = gen_reg_rtx (Pmode);
14955 /* Use the generic Mach-O PIC machinery. */
14956 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
14960 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14962 rtx tmp = legitimize_pe_coff_symbol (addr, true);
14967 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
14969 else if (TARGET_64BIT && !TARGET_PECOFF
14970 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
14973 /* This symbol may be referenced via a displacement from the PIC
14974 base address (@GOTOFF). */
14976 if (GET_CODE (addr) == CONST)
14977 addr = XEXP (addr, 0);
14978 if (GET_CODE (addr) == PLUS)
14980 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
14982 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
14985 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14986 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
14988 tmpreg = gen_reg_rtx (Pmode);
14991 emit_move_insn (tmpreg, new_rtx);
14995 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
14996 tmpreg, 1, OPTAB_DIRECT);
15000 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
15002 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
15004 /* This symbol may be referenced via a displacement from the PIC
15005 base address (@GOTOFF). */
15007 if (GET_CODE (addr) == CONST)
15008 addr = XEXP (addr, 0);
15009 if (GET_CODE (addr) == PLUS)
15011 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
15013 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
15016 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
15017 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15018 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15022 emit_move_insn (reg, new_rtx);
15026 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
15027 /* We can't use @GOTOFF for text labels on VxWorks;
15028 see gotoff_operand. */
15029 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
15031 rtx tmp = legitimize_pe_coff_symbol (addr, true);
15035 /* For x64 PE-COFF there is no GOT table. So we use address
15037 if (TARGET_64BIT && TARGET_PECOFF)
15039 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
15040 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15043 reg = gen_reg_rtx (Pmode);
15044 emit_move_insn (reg, new_rtx);
15047 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
15049 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
15050 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15051 new_rtx = gen_const_mem (Pmode, new_rtx);
15052 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15055 reg = gen_reg_rtx (Pmode);
15056 /* Use directly gen_movsi, otherwise the address is loaded
15057 into register for CSE. We don't want to CSE this addresses,
15058 instead we CSE addresses from the GOT table, so skip this. */
15059 emit_insn (gen_movsi (reg, new_rtx));
15064 /* This symbol must be referenced via a load from the
15065 Global Offset Table (@GOT). */
15067 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
15068 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15070 new_rtx = force_reg (Pmode, new_rtx);
15071 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15072 new_rtx = gen_const_mem (Pmode, new_rtx);
15073 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
15076 reg = gen_reg_rtx (Pmode);
15077 emit_move_insn (reg, new_rtx);
15083 if (CONST_INT_P (addr)
15084 && !x86_64_immediate_operand (addr, VOIDmode))
15088 emit_move_insn (reg, addr);
15092 new_rtx = force_reg (Pmode, addr);
15094 else if (GET_CODE (addr) == CONST)
15096 addr = XEXP (addr, 0);
15098 /* We must match stuff we generate before. Assume the only
15099 unspecs that can get here are ours. Not that we could do
15100 anything with them anyway.... */
15101 if (GET_CODE (addr) == UNSPEC
15102 || (GET_CODE (addr) == PLUS
15103 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
15105 gcc_assert (GET_CODE (addr) == PLUS);
15107 if (GET_CODE (addr) == PLUS)
15109 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
15111 /* Check first to see if this is a constant offset from a @GOTOFF
15112 symbol reference. */
15113 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
15114 && CONST_INT_P (op1))
15118 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
15120 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
15121 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
15122 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
15126 emit_move_insn (reg, new_rtx);
15132 if (INTVAL (op1) < -16*1024*1024
15133 || INTVAL (op1) >= 16*1024*1024)
15135 if (!x86_64_immediate_operand (op1, Pmode))
15136 op1 = force_reg (Pmode, op1);
15137 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
15143 rtx base = legitimize_pic_address (op0, reg);
15144 machine_mode mode = GET_MODE (base);
15146 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
15148 if (CONST_INT_P (new_rtx))
15150 if (INTVAL (new_rtx) < -16*1024*1024
15151 || INTVAL (new_rtx) >= 16*1024*1024)
15153 if (!x86_64_immediate_operand (new_rtx, mode))
15154 new_rtx = force_reg (mode, new_rtx);
15156 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
15159 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
15163 /* For %rip addressing, we have to use just disp32, not
15166 && (GET_CODE (base) == SYMBOL_REF
15167 || GET_CODE (base) == LABEL_REF))
15168 base = force_reg (mode, base);
15169 if (GET_CODE (new_rtx) == PLUS
15170 && CONSTANT_P (XEXP (new_rtx, 1)))
15172 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
15173 new_rtx = XEXP (new_rtx, 1);
15175 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
15183 /* Load the thread pointer. If TO_REG is true, force it into a register. */
15186 get_thread_pointer (machine_mode tp_mode, bool to_reg)
15188 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
15190 if (GET_MODE (tp) != tp_mode)
15192 gcc_assert (GET_MODE (tp) == SImode);
15193 gcc_assert (tp_mode == DImode);
15195 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
15199 tp = copy_to_mode_reg (tp_mode, tp);
15204 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15206 static GTY(()) rtx ix86_tls_symbol;
15209 ix86_tls_get_addr (void)
15211 if (!ix86_tls_symbol)
15214 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
15215 ? "___tls_get_addr" : "__tls_get_addr");
15217 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
15220 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
15222 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
15224 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
15225 gen_rtx_CONST (Pmode, unspec));
15228 return ix86_tls_symbol;
15231 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15233 static GTY(()) rtx ix86_tls_module_base_symbol;
15236 ix86_tls_module_base (void)
15238 if (!ix86_tls_module_base_symbol)
15240 ix86_tls_module_base_symbol
15241 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
15243 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15244 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15247 return ix86_tls_module_base_symbol;
15250 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
15251 false if we expect this to be used for a memory address and true if
15252 we expect to load the address into a register. */
15255 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
15257 rtx dest, base, off;
15258 rtx pic = NULL_RTX, tp = NULL_RTX;
15259 machine_mode tp_mode = Pmode;
15262 /* Fall back to global dynamic model if tool chain cannot support local
15264 if (TARGET_SUN_TLS && !TARGET_64BIT
15265 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
15266 && model == TLS_MODEL_LOCAL_DYNAMIC)
15267 model = TLS_MODEL_GLOBAL_DYNAMIC;
15271 case TLS_MODEL_GLOBAL_DYNAMIC:
15272 dest = gen_reg_rtx (Pmode);
15276 if (flag_pic && !TARGET_PECOFF)
15277 pic = pic_offset_table_rtx;
15280 pic = gen_reg_rtx (Pmode);
15281 emit_insn (gen_set_got (pic));
15285 if (TARGET_GNU2_TLS)
15288 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
15290 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
15292 tp = get_thread_pointer (Pmode, true);
15293 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
15295 if (GET_MODE (x) != Pmode)
15296 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15298 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15302 rtx caddr = ix86_tls_get_addr ();
15306 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15311 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
15312 insns = get_insns ();
15315 if (GET_MODE (x) != Pmode)
15316 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15318 RTL_CONST_CALL_P (insns) = 1;
15319 emit_libcall_block (insns, dest, rax, x);
15322 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
15326 case TLS_MODEL_LOCAL_DYNAMIC:
15327 base = gen_reg_rtx (Pmode);
15332 pic = pic_offset_table_rtx;
15335 pic = gen_reg_rtx (Pmode);
15336 emit_insn (gen_set_got (pic));
15340 if (TARGET_GNU2_TLS)
15342 rtx tmp = ix86_tls_module_base ();
15345 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
15347 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
15349 tp = get_thread_pointer (Pmode, true);
15350 set_unique_reg_note (get_last_insn (), REG_EQUAL,
15351 gen_rtx_MINUS (Pmode, tmp, tp));
15355 rtx caddr = ix86_tls_get_addr ();
15359 rtx rax = gen_rtx_REG (Pmode, AX_REG);
15365 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
15366 insns = get_insns ();
15369 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
15370 share the LD_BASE result with other LD model accesses. */
15371 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
15372 UNSPEC_TLS_LD_BASE);
15374 RTL_CONST_CALL_P (insns) = 1;
15375 emit_libcall_block (insns, base, rax, eqv);
15378 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
15381 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
15382 off = gen_rtx_CONST (Pmode, off);
15384 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
15386 if (TARGET_GNU2_TLS)
15388 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
15390 if (GET_MODE (x) != Pmode)
15391 x = gen_rtx_ZERO_EXTEND (Pmode, x);
15393 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
15397 case TLS_MODEL_INITIAL_EXEC:
15400 if (TARGET_SUN_TLS && !TARGET_X32)
15402 /* The Sun linker took the AMD64 TLS spec literally
15403 and can only handle %rax as destination of the
15404 initial executable code sequence. */
15406 dest = gen_reg_rtx (DImode);
15407 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
15411 /* Generate DImode references to avoid %fs:(%reg32)
15412 problems and linker IE->LE relaxation bug. */
15415 type = UNSPEC_GOTNTPOFF;
15419 pic = pic_offset_table_rtx;
15420 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
15422 else if (!TARGET_ANY_GNU_TLS)
15424 pic = gen_reg_rtx (Pmode);
15425 emit_insn (gen_set_got (pic));
15426 type = UNSPEC_GOTTPOFF;
15431 type = UNSPEC_INDNTPOFF;
15434 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
15435 off = gen_rtx_CONST (tp_mode, off);
15437 off = gen_rtx_PLUS (tp_mode, pic, off);
15438 off = gen_const_mem (tp_mode, off);
15439 set_mem_alias_set (off, ix86_GOT_alias_set ());
15441 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15443 base = get_thread_pointer (tp_mode,
15444 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15445 off = force_reg (tp_mode, off);
15446 return gen_rtx_PLUS (tp_mode, base, off);
15450 base = get_thread_pointer (Pmode, true);
15451 dest = gen_reg_rtx (Pmode);
15452 emit_insn (ix86_gen_sub3 (dest, base, off));
15456 case TLS_MODEL_LOCAL_EXEC:
15457 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
15458 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15459 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
15460 off = gen_rtx_CONST (Pmode, off);
15462 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
15464 base = get_thread_pointer (Pmode,
15465 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
15466 return gen_rtx_PLUS (Pmode, base, off);
15470 base = get_thread_pointer (Pmode, true);
15471 dest = gen_reg_rtx (Pmode);
15472 emit_insn (ix86_gen_sub3 (dest, base, off));
15477 gcc_unreachable ();
15483 /* Create or return the unique __imp_DECL dllimport symbol corresponding
15484 to symbol DECL if BEIMPORT is true. Otherwise create or return the
15485 unique refptr-DECL symbol corresponding to symbol DECL. */
15487 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
15489 static inline hashval_t hash (tree_map *m) { return m->hash; }
15491 equal (tree_map *a, tree_map *b)
15493 return a->base.from == b->base.from;
15497 keep_cache_entry (tree_map *&m)
15499 return ggc_marked_p (m->base.from);
15503 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
15506 get_dllimport_decl (tree decl, bool beimport)
15508 struct tree_map *h, in;
15510 const char *prefix;
15511 size_t namelen, prefixlen;
15516 if (!dllimport_map)
15517 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
15519 in.hash = htab_hash_pointer (decl);
15520 in.base.from = decl;
15521 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
15526 *loc = h = ggc_alloc<tree_map> ();
15528 h->base.from = decl;
15529 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
15530 VAR_DECL, NULL, ptr_type_node);
15531 DECL_ARTIFICIAL (to) = 1;
15532 DECL_IGNORED_P (to) = 1;
15533 DECL_EXTERNAL (to) = 1;
15534 TREE_READONLY (to) = 1;
15536 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
15537 name = targetm.strip_name_encoding (name);
15539 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
15540 ? "*__imp_" : "*__imp__";
15542 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
15543 namelen = strlen (name);
15544 prefixlen = strlen (prefix);
15545 imp_name = (char *) alloca (namelen + prefixlen + 1);
15546 memcpy (imp_name, prefix, prefixlen);
15547 memcpy (imp_name + prefixlen, name, namelen + 1);
15549 name = ggc_alloc_string (imp_name, namelen + prefixlen);
15550 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
15551 SET_SYMBOL_REF_DECL (rtl, to);
15552 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
15555 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
15556 #ifdef SUB_TARGET_RECORD_STUB
15557 SUB_TARGET_RECORD_STUB (name);
15561 rtl = gen_const_mem (Pmode, rtl);
15562 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
15564 SET_DECL_RTL (to, rtl);
15565 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
15570 /* Expand SYMBOL into its corresponding far-addresse symbol.
15571 WANT_REG is true if we require the result be a register. */
15574 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
15579 gcc_assert (SYMBOL_REF_DECL (symbol));
15580 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
15582 x = DECL_RTL (imp_decl);
15584 x = force_reg (Pmode, x);
15588 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
15589 true if we require the result be a register. */
15592 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
15597 gcc_assert (SYMBOL_REF_DECL (symbol));
15598 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
15600 x = DECL_RTL (imp_decl);
15602 x = force_reg (Pmode, x);
15606 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
15607 is true if we require the result be a register. */
15610 legitimize_pe_coff_symbol (rtx addr, bool inreg)
15612 if (!TARGET_PECOFF)
15615 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15617 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
15618 return legitimize_dllimport_symbol (addr, inreg);
15619 if (GET_CODE (addr) == CONST
15620 && GET_CODE (XEXP (addr, 0)) == PLUS
15621 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15622 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
15624 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
15625 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15629 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
15631 if (GET_CODE (addr) == SYMBOL_REF
15632 && !is_imported_p (addr)
15633 && SYMBOL_REF_EXTERNAL_P (addr)
15634 && SYMBOL_REF_DECL (addr))
15635 return legitimize_pe_coff_extern_decl (addr, inreg);
15637 if (GET_CODE (addr) == CONST
15638 && GET_CODE (XEXP (addr, 0)) == PLUS
15639 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
15640 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
15641 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
15642 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
15644 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
15645 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
15650 /* Try machine-dependent ways of modifying an illegitimate address
15651 to be legitimate. If we find one, return the new, valid address.
15652 This macro is used in only one place: `memory_address' in explow.c.
15654 OLDX is the address as it was before break_out_memory_refs was called.
15655 In some cases it is useful to look at this to decide what needs to be done.
15657 It is always safe for this macro to do nothing. It exists to recognize
15658 opportunities to optimize the output.
15660 For the 80386, we handle X+REG by loading X into a register R and
15661 using R+REG. R will go in a general reg and indexing will be used.
15662 However, if REG is a broken-out memory address or multiplication,
15663 nothing needs to be done because REG can certainly go in a general reg.
15665 When -fpic is used, special handling is needed for symbolic references.
15666 See comments by legitimize_pic_address in i386.c for details. */
15669 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
15671 bool changed = false;
15674 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
15676 return legitimize_tls_address (x, (enum tls_model) log, false);
15677 if (GET_CODE (x) == CONST
15678 && GET_CODE (XEXP (x, 0)) == PLUS
15679 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
15680 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
15682 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
15683 (enum tls_model) log, false);
15684 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
15687 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
15689 rtx tmp = legitimize_pe_coff_symbol (x, true);
15694 if (flag_pic && SYMBOLIC_CONST (x))
15695 return legitimize_pic_address (x, 0);
15698 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
15699 return machopic_indirect_data_reference (x, 0);
15702 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
15703 if (GET_CODE (x) == ASHIFT
15704 && CONST_INT_P (XEXP (x, 1))
15705 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
15708 log = INTVAL (XEXP (x, 1));
15709 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
15710 GEN_INT (1 << log));
15713 if (GET_CODE (x) == PLUS)
15715 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
15717 if (GET_CODE (XEXP (x, 0)) == ASHIFT
15718 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15719 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
15722 log = INTVAL (XEXP (XEXP (x, 0), 1));
15723 XEXP (x, 0) = gen_rtx_MULT (Pmode,
15724 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
15725 GEN_INT (1 << log));
15728 if (GET_CODE (XEXP (x, 1)) == ASHIFT
15729 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
15730 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
15733 log = INTVAL (XEXP (XEXP (x, 1), 1));
15734 XEXP (x, 1) = gen_rtx_MULT (Pmode,
15735 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
15736 GEN_INT (1 << log));
15739 /* Put multiply first if it isn't already. */
15740 if (GET_CODE (XEXP (x, 1)) == MULT)
15742 std::swap (XEXP (x, 0), XEXP (x, 1));
15746 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
15747 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
15748 created by virtual register instantiation, register elimination, and
15749 similar optimizations. */
15750 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
15753 x = gen_rtx_PLUS (Pmode,
15754 gen_rtx_PLUS (Pmode, XEXP (x, 0),
15755 XEXP (XEXP (x, 1), 0)),
15756 XEXP (XEXP (x, 1), 1));
15760 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
15761 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
15762 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
15763 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15764 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
15765 && CONSTANT_P (XEXP (x, 1)))
15768 rtx other = NULL_RTX;
15770 if (CONST_INT_P (XEXP (x, 1)))
15772 constant = XEXP (x, 1);
15773 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
15775 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
15777 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
15778 other = XEXP (x, 1);
15786 x = gen_rtx_PLUS (Pmode,
15787 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
15788 XEXP (XEXP (XEXP (x, 0), 1), 0)),
15789 plus_constant (Pmode, other,
15790 INTVAL (constant)));
15794 if (changed && ix86_legitimate_address_p (mode, x, false))
15797 if (GET_CODE (XEXP (x, 0)) == MULT)
15800 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
15803 if (GET_CODE (XEXP (x, 1)) == MULT)
15806 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
15810 && REG_P (XEXP (x, 1))
15811 && REG_P (XEXP (x, 0)))
15814 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
15817 x = legitimize_pic_address (x, 0);
15820 if (changed && ix86_legitimate_address_p (mode, x, false))
15823 if (REG_P (XEXP (x, 0)))
15825 rtx temp = gen_reg_rtx (Pmode);
15826 rtx val = force_operand (XEXP (x, 1), temp);
15829 val = convert_to_mode (Pmode, val, 1);
15830 emit_move_insn (temp, val);
15833 XEXP (x, 1) = temp;
15837 else if (REG_P (XEXP (x, 1)))
15839 rtx temp = gen_reg_rtx (Pmode);
15840 rtx val = force_operand (XEXP (x, 0), temp);
15843 val = convert_to_mode (Pmode, val, 1);
15844 emit_move_insn (temp, val);
15847 XEXP (x, 0) = temp;
15855 /* Print an integer constant expression in assembler syntax. Addition
15856 and subtraction are the only arithmetic that may appear in these
15857 expressions. FILE is the stdio stream to write to, X is the rtx, and
15858 CODE is the operand print code from the output string. */
15861 output_pic_addr_const (FILE *file, rtx x, int code)
15865 switch (GET_CODE (x))
15868 gcc_assert (flag_pic);
15873 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
15874 output_addr_const (file, x);
15877 const char *name = XSTR (x, 0);
15879 /* Mark the decl as referenced so that cgraph will
15880 output the function. */
15881 if (SYMBOL_REF_DECL (x))
15882 mark_decl_referenced (SYMBOL_REF_DECL (x));
15885 if (MACHOPIC_INDIRECT
15886 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
15887 name = machopic_indirection_name (x, /*stub_p=*/true);
15889 assemble_name (file, name);
15891 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
15892 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
15893 fputs ("@PLT", file);
15900 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
15901 assemble_name (asm_out_file, buf);
15905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15909 /* This used to output parentheses around the expression,
15910 but that does not work on the 386 (either ATT or BSD assembler). */
15911 output_pic_addr_const (file, XEXP (x, 0), code);
15915 /* We can't handle floating point constants;
15916 TARGET_PRINT_OPERAND must handle them. */
15917 output_operand_lossage ("floating constant misused");
15921 /* Some assemblers need integer constants to appear first. */
15922 if (CONST_INT_P (XEXP (x, 0)))
15924 output_pic_addr_const (file, XEXP (x, 0), code);
15926 output_pic_addr_const (file, XEXP (x, 1), code);
15930 gcc_assert (CONST_INT_P (XEXP (x, 1)));
15931 output_pic_addr_const (file, XEXP (x, 1), code);
15933 output_pic_addr_const (file, XEXP (x, 0), code);
15939 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
15940 output_pic_addr_const (file, XEXP (x, 0), code);
15942 output_pic_addr_const (file, XEXP (x, 1), code);
15944 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
15948 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
15950 bool f = i386_asm_output_addr_const_extra (file, x);
15955 gcc_assert (XVECLEN (x, 0) == 1);
15956 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
15957 switch (XINT (x, 1))
15960 fputs ("@GOT", file);
15962 case UNSPEC_GOTOFF:
15963 fputs ("@GOTOFF", file);
15965 case UNSPEC_PLTOFF:
15966 fputs ("@PLTOFF", file);
15969 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15970 "(%rip)" : "[rip]", file);
15972 case UNSPEC_GOTPCREL:
15973 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15974 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
15976 case UNSPEC_GOTTPOFF:
15977 /* FIXME: This might be @TPOFF in Sun ld too. */
15978 fputs ("@gottpoff", file);
15981 fputs ("@tpoff", file);
15983 case UNSPEC_NTPOFF:
15985 fputs ("@tpoff", file);
15987 fputs ("@ntpoff", file);
15989 case UNSPEC_DTPOFF:
15990 fputs ("@dtpoff", file);
15992 case UNSPEC_GOTNTPOFF:
15994 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15995 "@gottpoff(%rip)": "@gottpoff[rip]", file);
15997 fputs ("@gotntpoff", file);
15999 case UNSPEC_INDNTPOFF:
16000 fputs ("@indntpoff", file);
16003 case UNSPEC_MACHOPIC_OFFSET:
16005 machopic_output_function_base_name (file);
16009 output_operand_lossage ("invalid UNSPEC as operand");
16015 output_operand_lossage ("invalid expression as operand");
16019 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
16020 We need to emit DTP-relative relocations. */
16022 static void ATTRIBUTE_UNUSED
16023 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
16025 fputs (ASM_LONG, file);
16026 output_addr_const (file, x);
16027 fputs ("@dtpoff", file);
16033 fputs (", 0", file);
16036 gcc_unreachable ();
16040 /* Return true if X is a representation of the PIC register. This copes
16041 with calls from ix86_find_base_term, where the register might have
16042 been replaced by a cselib value. */
16045 ix86_pic_register_p (rtx x)
16047 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
16048 return (pic_offset_table_rtx
16049 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
16050 else if (!REG_P (x))
16052 else if (pic_offset_table_rtx)
16054 if (REGNO (x) == REGNO (pic_offset_table_rtx))
16056 if (HARD_REGISTER_P (x)
16057 && !HARD_REGISTER_P (pic_offset_table_rtx)
16058 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
16063 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
16066 /* Helper function for ix86_delegitimize_address.
16067 Attempt to delegitimize TLS local-exec accesses. */
16070 ix86_delegitimize_tls_address (rtx orig_x)
16072 rtx x = orig_x, unspec;
16073 struct ix86_address addr;
16075 if (!TARGET_TLS_DIRECT_SEG_REFS)
16079 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
16081 if (ix86_decompose_address (x, &addr) == 0
16082 || addr.seg != DEFAULT_TLS_SEG_REG
16083 || addr.disp == NULL_RTX
16084 || GET_CODE (addr.disp) != CONST)
16086 unspec = XEXP (addr.disp, 0);
16087 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
16088 unspec = XEXP (unspec, 0);
16089 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
16091 x = XVECEXP (unspec, 0, 0);
16092 gcc_assert (GET_CODE (x) == SYMBOL_REF);
16093 if (unspec != XEXP (addr.disp, 0))
16094 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
16097 rtx idx = addr.index;
16098 if (addr.scale != 1)
16099 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
16100 x = gen_rtx_PLUS (Pmode, idx, x);
16103 x = gen_rtx_PLUS (Pmode, addr.base, x);
16104 if (MEM_P (orig_x))
16105 x = replace_equiv_address_nv (orig_x, x);
16109 /* In the name of slightly smaller debug output, and to cater to
16110 general assembler lossage, recognize PIC+GOTOFF and turn it back
16111 into a direct symbol reference.
16113 On Darwin, this is necessary to avoid a crash, because Darwin
16114 has a different PIC label for each routine but the DWARF debugging
16115 information is not associated with any particular routine, so it's
16116 necessary to remove references to the PIC label from RTL stored by
16117 the DWARF output code. */
16120 ix86_delegitimize_address (rtx x)
16122 rtx orig_x = delegitimize_mem_from_attrs (x);
16123 /* addend is NULL or some rtx if x is something+GOTOFF where
16124 something doesn't include the PIC register. */
16125 rtx addend = NULL_RTX;
16126 /* reg_addend is NULL or a multiple of some register. */
16127 rtx reg_addend = NULL_RTX;
16128 /* const_addend is NULL or a const_int. */
16129 rtx const_addend = NULL_RTX;
16130 /* This is the result, or NULL. */
16131 rtx result = NULL_RTX;
16140 if (GET_CODE (x) == CONST
16141 && GET_CODE (XEXP (x, 0)) == PLUS
16142 && GET_MODE (XEXP (x, 0)) == Pmode
16143 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
16144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
16145 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
16147 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
16148 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
16149 if (MEM_P (orig_x))
16150 x = replace_equiv_address_nv (orig_x, x);
16154 if (GET_CODE (x) == CONST
16155 && GET_CODE (XEXP (x, 0)) == UNSPEC
16156 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
16157 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
16158 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
16160 x = XVECEXP (XEXP (x, 0), 0, 0);
16161 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
16163 x = simplify_gen_subreg (GET_MODE (orig_x), x,
16171 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
16172 return ix86_delegitimize_tls_address (orig_x);
16174 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
16175 and -mcmodel=medium -fpic. */
16178 if (GET_CODE (x) != PLUS
16179 || GET_CODE (XEXP (x, 1)) != CONST)
16180 return ix86_delegitimize_tls_address (orig_x);
16182 if (ix86_pic_register_p (XEXP (x, 0)))
16183 /* %ebx + GOT/GOTOFF */
16185 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16187 /* %ebx + %reg * scale + GOT/GOTOFF */
16188 reg_addend = XEXP (x, 0);
16189 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
16190 reg_addend = XEXP (reg_addend, 1);
16191 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
16192 reg_addend = XEXP (reg_addend, 0);
16195 reg_addend = NULL_RTX;
16196 addend = XEXP (x, 0);
16200 addend = XEXP (x, 0);
16202 x = XEXP (XEXP (x, 1), 0);
16203 if (GET_CODE (x) == PLUS
16204 && CONST_INT_P (XEXP (x, 1)))
16206 const_addend = XEXP (x, 1);
16210 if (GET_CODE (x) == UNSPEC
16211 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
16212 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
16213 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
16214 && !MEM_P (orig_x) && !addend)))
16215 result = XVECEXP (x, 0, 0);
16217 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
16218 && !MEM_P (orig_x))
16219 result = XVECEXP (x, 0, 0);
16222 return ix86_delegitimize_tls_address (orig_x);
16225 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
16227 result = gen_rtx_PLUS (Pmode, reg_addend, result);
16230 /* If the rest of original X doesn't involve the PIC register, add
16231 addend and subtract pic_offset_table_rtx. This can happen e.g.
16233 leal (%ebx, %ecx, 4), %ecx
16235 movl foo@GOTOFF(%ecx), %edx
16236 in which case we return (%ecx - %ebx) + foo
16237 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
16238 and reload has completed. */
16239 if (pic_offset_table_rtx
16240 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
16241 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
16242 pic_offset_table_rtx),
16244 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
16246 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
16247 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
16248 result = gen_rtx_PLUS (Pmode, tmp, result);
16253 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
16255 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
16256 if (result == NULL_RTX)
16262 /* If X is a machine specific address (i.e. a symbol or label being
16263 referenced as a displacement from the GOT implemented using an
16264 UNSPEC), then return the base term. Otherwise return X. */
16267 ix86_find_base_term (rtx x)
16273 if (GET_CODE (x) != CONST)
16275 term = XEXP (x, 0);
16276 if (GET_CODE (term) == PLUS
16277 && CONST_INT_P (XEXP (term, 1)))
16278 term = XEXP (term, 0);
16279 if (GET_CODE (term) != UNSPEC
16280 || (XINT (term, 1) != UNSPEC_GOTPCREL
16281 && XINT (term, 1) != UNSPEC_PCREL))
16284 return XVECEXP (term, 0, 0);
16287 return ix86_delegitimize_address (x);
16291 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
16292 bool fp, FILE *file)
16294 const char *suffix;
16296 if (mode == CCFPmode || mode == CCFPUmode)
16298 code = ix86_fp_compare_code_to_integer (code);
16302 code = reverse_condition (code);
16353 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
16357 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
16358 Those same assemblers have the same but opposite lossage on cmov. */
16359 if (mode == CCmode)
16360 suffix = fp ? "nbe" : "a";
16362 gcc_unreachable ();
16378 gcc_unreachable ();
16382 if (mode == CCmode)
16384 else if (mode == CCCmode)
16385 suffix = fp ? "b" : "c";
16387 gcc_unreachable ();
16403 gcc_unreachable ();
16407 if (mode == CCmode)
16409 else if (mode == CCCmode)
16410 suffix = fp ? "nb" : "nc";
16412 gcc_unreachable ();
16415 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
16419 if (mode == CCmode)
16422 gcc_unreachable ();
16425 suffix = fp ? "u" : "p";
16428 suffix = fp ? "nu" : "np";
16431 gcc_unreachable ();
16433 fputs (suffix, file);
16436 /* Print the name of register X to FILE based on its machine mode and number.
16437 If CODE is 'w', pretend the mode is HImode.
16438 If CODE is 'b', pretend the mode is QImode.
16439 If CODE is 'k', pretend the mode is SImode.
16440 If CODE is 'q', pretend the mode is DImode.
16441 If CODE is 'x', pretend the mode is V4SFmode.
16442 If CODE is 't', pretend the mode is V8SFmode.
16443 If CODE is 'g', pretend the mode is V16SFmode.
16444 If CODE is 'h', pretend the reg is the 'high' byte register.
16445 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
16446 If CODE is 'd', duplicate the operand for AVX instruction.
16450 print_reg (rtx x, int code, FILE *file)
16454 unsigned int regno;
16457 if (ASSEMBLER_DIALECT == ASM_ATT)
16462 gcc_assert (TARGET_64BIT);
16463 fputs ("rip", file);
16467 if (code == 'y' && STACK_TOP_P (x))
16469 fputs ("st(0)", file);
16475 else if (code == 'b')
16477 else if (code == 'k')
16479 else if (code == 'q')
16481 else if (code == 'h')
16483 else if (code == 'x')
16485 else if (code == 't')
16487 else if (code == 'g')
16490 msize = GET_MODE_SIZE (GET_MODE (x));
16492 regno = true_regnum (x);
16494 gcc_assert (regno != ARG_POINTER_REGNUM
16495 && regno != FRAME_POINTER_REGNUM
16496 && regno != FLAGS_REG
16497 && regno != FPSR_REG
16498 && regno != FPCR_REG);
16500 duplicated = code == 'd' && TARGET_AVX;
16506 if (LEGACY_INT_REGNO_P (regno))
16507 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
16512 reg = hi_reg_name[regno];
16515 if (regno >= ARRAY_SIZE (qi_reg_name))
16517 reg = qi_reg_name[regno];
16520 if (regno >= ARRAY_SIZE (qi_high_reg_name))
16522 reg = qi_high_reg_name[regno];
16526 if (SSE_REGNO_P (regno))
16528 gcc_assert (!duplicated);
16529 putc (msize == 32 ? 'y' : 'z', file);
16530 reg = hi_reg_name[regno] + 1;
16535 gcc_unreachable ();
16540 /* Irritatingly, AMD extended registers use
16541 different naming convention: "r%d[bwd]" */
16542 if (REX_INT_REGNO_P (regno))
16544 gcc_assert (TARGET_64BIT);
16548 error ("extended registers have no high halves");
16563 error ("unsupported operand size for extended register");
16571 if (ASSEMBLER_DIALECT == ASM_ATT)
16572 fprintf (file, ", %%%s", reg);
16574 fprintf (file, ", %s", reg);
16578 /* Meaning of CODE:
16579 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
16580 C -- print opcode suffix for set/cmov insn.
16581 c -- like C, but print reversed condition
16582 F,f -- likewise, but for floating-point.
16583 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
16585 R -- print embeded rounding and sae.
16586 r -- print only sae.
16587 z -- print the opcode suffix for the size of the current operand.
16588 Z -- likewise, with special suffixes for x87 instructions.
16589 * -- print a star (in certain assembler syntax)
16590 A -- print an absolute memory reference.
16591 E -- print address with DImode register names if TARGET_64BIT.
16592 w -- print the operand as if it's a "word" (HImode) even if it isn't.
16593 s -- print a shift double count, followed by the assemblers argument
16595 b -- print the QImode name of the register for the indicated operand.
16596 %b0 would print %al if operands[0] is reg 0.
16597 w -- likewise, print the HImode name of the register.
16598 k -- likewise, print the SImode name of the register.
16599 q -- likewise, print the DImode name of the register.
16600 x -- likewise, print the V4SFmode name of the register.
16601 t -- likewise, print the V8SFmode name of the register.
16602 g -- likewise, print the V16SFmode name of the register.
16603 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
16604 y -- print "st(0)" instead of "st" as a register.
16605 d -- print duplicated register operand for AVX instruction.
16606 D -- print condition for SSE cmp instruction.
16607 P -- if PIC, print an @PLT suffix.
16608 p -- print raw symbol name.
16609 X -- don't print any sort of PIC '@' suffix for a symbol.
16610 & -- print some in-use local-dynamic symbol name.
16611 H -- print a memory address offset by 8; used for sse high-parts
16612 Y -- print condition for XOP pcom* instruction.
16613 + -- print a branch hint as 'cs' or 'ds' prefix
16614 ; -- print a semicolon (after prefixes due to bug in older gas).
16615 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
16616 @ -- print a segment register of thread base pointer load
16617 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
16618 ! -- print MPX prefix for jxx/call/ret instructions if required.
16622 ix86_print_operand (FILE *file, rtx x, int code)
16629 switch (ASSEMBLER_DIALECT)
16636 /* Intel syntax. For absolute addresses, registers should not
16637 be surrounded by braces. */
16641 ix86_print_operand (file, x, 0);
16648 gcc_unreachable ();
16651 ix86_print_operand (file, x, 0);
16655 /* Wrap address in an UNSPEC to declare special handling. */
16657 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
16659 output_address (VOIDmode, x);
16663 if (ASSEMBLER_DIALECT == ASM_ATT)
16668 if (ASSEMBLER_DIALECT == ASM_ATT)
16673 if (ASSEMBLER_DIALECT == ASM_ATT)
16678 if (ASSEMBLER_DIALECT == ASM_ATT)
16683 if (ASSEMBLER_DIALECT == ASM_ATT)
16688 if (ASSEMBLER_DIALECT == ASM_ATT)
16693 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16694 if (ASSEMBLER_DIALECT != ASM_ATT)
16697 switch (GET_MODE_SIZE (GET_MODE (x)))
16712 output_operand_lossage
16713 ("invalid operand size for operand code 'O'");
16722 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16724 /* Opcodes don't get size suffixes if using Intel opcodes. */
16725 if (ASSEMBLER_DIALECT == ASM_INTEL)
16728 switch (GET_MODE_SIZE (GET_MODE (x)))
16747 output_operand_lossage
16748 ("invalid operand size for operand code 'z'");
16753 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16755 (0, "non-integer operand used with operand code 'z'");
16759 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
16760 if (ASSEMBLER_DIALECT == ASM_INTEL)
16763 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
16765 switch (GET_MODE_SIZE (GET_MODE (x)))
16768 #ifdef HAVE_AS_IX86_FILDS
16778 #ifdef HAVE_AS_IX86_FILDQ
16781 fputs ("ll", file);
16789 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16791 /* 387 opcodes don't get size suffixes
16792 if the operands are registers. */
16793 if (STACK_REG_P (x))
16796 switch (GET_MODE_SIZE (GET_MODE (x)))
16817 output_operand_lossage
16818 ("invalid operand type used with operand code 'Z'");
16822 output_operand_lossage
16823 ("invalid operand size for operand code 'Z'");
16842 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
16844 ix86_print_operand (file, x, 0);
16845 fputs (", ", file);
16850 switch (GET_CODE (x))
16853 fputs ("neq", file);
16856 fputs ("eq", file);
16860 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
16864 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
16868 fputs ("le", file);
16872 fputs ("lt", file);
16875 fputs ("unord", file);
16878 fputs ("ord", file);
16881 fputs ("ueq", file);
16884 fputs ("nlt", file);
16887 fputs ("nle", file);
16890 fputs ("ule", file);
16893 fputs ("ult", file);
16896 fputs ("une", file);
16899 output_operand_lossage ("operand is not a condition code, "
16900 "invalid operand code 'Y'");
16906 /* Little bit of braindamage here. The SSE compare instructions
16907 does use completely different names for the comparisons that the
16908 fp conditional moves. */
16909 switch (GET_CODE (x))
16914 fputs ("eq_us", file);
16918 fputs ("eq", file);
16923 fputs ("nge", file);
16927 fputs ("lt", file);
16932 fputs ("ngt", file);
16936 fputs ("le", file);
16939 fputs ("unord", file);
16944 fputs ("neq_oq", file);
16948 fputs ("neq", file);
16953 fputs ("ge", file);
16957 fputs ("nlt", file);
16962 fputs ("gt", file);
16966 fputs ("nle", file);
16969 fputs ("ord", file);
16972 output_operand_lossage ("operand is not a condition code, "
16973 "invalid operand code 'D'");
16980 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
16981 if (ASSEMBLER_DIALECT == ASM_ATT)
16987 if (!COMPARISON_P (x))
16989 output_operand_lossage ("operand is not a condition code, "
16990 "invalid operand code '%c'", code);
16993 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
16994 code == 'c' || code == 'f',
16995 code == 'F' || code == 'f',
17000 if (!offsettable_memref_p (x))
17002 output_operand_lossage ("operand is not an offsettable memory "
17003 "reference, invalid operand code 'H'");
17006 /* It doesn't actually matter what mode we use here, as we're
17007 only going to use this for printing. */
17008 x = adjust_address_nv (x, DImode, 8);
17009 /* Output 'qword ptr' for intel assembler dialect. */
17010 if (ASSEMBLER_DIALECT == ASM_INTEL)
17015 gcc_assert (CONST_INT_P (x));
17017 if (INTVAL (x) & IX86_HLE_ACQUIRE)
17018 #ifdef HAVE_AS_IX86_HLE
17019 fputs ("xacquire ", file);
17021 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
17023 else if (INTVAL (x) & IX86_HLE_RELEASE)
17024 #ifdef HAVE_AS_IX86_HLE
17025 fputs ("xrelease ", file);
17027 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
17029 /* We do not want to print value of the operand. */
17033 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
17034 fputs ("{z}", file);
17038 gcc_assert (CONST_INT_P (x));
17039 gcc_assert (INTVAL (x) == ROUND_SAE);
17041 if (ASSEMBLER_DIALECT == ASM_INTEL)
17042 fputs (", ", file);
17044 fputs ("{sae}", file);
17046 if (ASSEMBLER_DIALECT == ASM_ATT)
17047 fputs (", ", file);
17052 gcc_assert (CONST_INT_P (x));
17054 if (ASSEMBLER_DIALECT == ASM_INTEL)
17055 fputs (", ", file);
17057 switch (INTVAL (x))
17059 case ROUND_NEAREST_INT | ROUND_SAE:
17060 fputs ("{rn-sae}", file);
17062 case ROUND_NEG_INF | ROUND_SAE:
17063 fputs ("{rd-sae}", file);
17065 case ROUND_POS_INF | ROUND_SAE:
17066 fputs ("{ru-sae}", file);
17068 case ROUND_ZERO | ROUND_SAE:
17069 fputs ("{rz-sae}", file);
17072 gcc_unreachable ();
17075 if (ASSEMBLER_DIALECT == ASM_ATT)
17076 fputs (", ", file);
17081 if (ASSEMBLER_DIALECT == ASM_ATT)
17087 const char *name = get_some_local_dynamic_name ();
17089 output_operand_lossage ("'%%&' used without any "
17090 "local dynamic TLS references");
17092 assemble_name (file, name);
17101 || optimize_function_for_size_p (cfun)
17102 || !TARGET_BRANCH_PREDICTION_HINTS)
17105 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
17108 int pred_val = XINT (x, 0);
17110 if (pred_val < REG_BR_PROB_BASE * 45 / 100
17111 || pred_val > REG_BR_PROB_BASE * 55 / 100)
17113 bool taken = pred_val > REG_BR_PROB_BASE / 2;
17115 = final_forward_branch_p (current_output_insn) == 0;
17117 /* Emit hints only in the case default branch prediction
17118 heuristics would fail. */
17119 if (taken != cputaken)
17121 /* We use 3e (DS) prefix for taken branches and
17122 2e (CS) prefix for not taken branches. */
17124 fputs ("ds ; ", file);
17126 fputs ("cs ; ", file);
17134 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
17140 if (ASSEMBLER_DIALECT == ASM_ATT)
17143 /* The kernel uses a different segment register for performance
17144 reasons; a system call would not have to trash the userspace
17145 segment register, which would be expensive. */
17146 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
17147 fputs ("fs", file);
17149 fputs ("gs", file);
17153 putc (TARGET_AVX2 ? 'i' : 'f', file);
17157 if (TARGET_64BIT && Pmode != word_mode)
17158 fputs ("addr32 ", file);
17162 if (ix86_bnd_prefixed_insn_p (current_output_insn))
17163 fputs ("bnd ", file);
17167 output_operand_lossage ("invalid operand code '%c'", code);
17172 print_reg (x, code, file);
17174 else if (MEM_P (x))
17176 rtx addr = XEXP (x, 0);
17178 /* No `byte ptr' prefix for call instructions ... */
17179 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
17181 machine_mode mode = GET_MODE (x);
17184 /* Check for explicit size override codes. */
17187 else if (code == 'w')
17189 else if (code == 'k')
17191 else if (code == 'q')
17193 else if (code == 'x')
17195 else if (mode == BLKmode)
17196 /* ... or BLKmode operands, when not overridden. */
17199 switch (GET_MODE_SIZE (mode))
17201 case 1: size = "BYTE"; break;
17202 case 2: size = "WORD"; break;
17203 case 4: size = "DWORD"; break;
17204 case 8: size = "QWORD"; break;
17205 case 12: size = "TBYTE"; break;
17207 if (mode == XFmode)
17212 case 32: size = "YMMWORD"; break;
17213 case 64: size = "ZMMWORD"; break;
17215 gcc_unreachable ();
17219 fputs (size, file);
17220 fputs (" PTR ", file);
17224 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
17225 output_operand_lossage ("invalid constraints for operand");
17227 ix86_print_operand_address_as
17228 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
17231 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
17235 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17237 if (ASSEMBLER_DIALECT == ASM_ATT)
17239 /* Sign extend 32bit SFmode immediate to 8 bytes. */
17241 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
17242 (unsigned long long) (int) l);
17244 fprintf (file, "0x%08x", (unsigned int) l);
17247 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
17251 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17253 if (ASSEMBLER_DIALECT == ASM_ATT)
17255 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
17258 /* These float cases don't actually occur as immediate operands. */
17259 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
17263 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
17264 fputs (dstr, file);
17269 /* We have patterns that allow zero sets of memory, for instance.
17270 In 64-bit mode, we should probably support all 8-byte vectors,
17271 since we can in fact encode that into an immediate. */
17272 if (GET_CODE (x) == CONST_VECTOR)
17274 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
17278 if (code != 'P' && code != 'p')
17280 if (CONST_INT_P (x))
17282 if (ASSEMBLER_DIALECT == ASM_ATT)
17285 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
17286 || GET_CODE (x) == LABEL_REF)
17288 if (ASSEMBLER_DIALECT == ASM_ATT)
17291 fputs ("OFFSET FLAT:", file);
17294 if (CONST_INT_P (x))
17295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17296 else if (flag_pic || MACHOPIC_INDIRECT)
17297 output_pic_addr_const (file, x, code);
17299 output_addr_const (file, x);
17304 ix86_print_operand_punct_valid_p (unsigned char code)
17306 return (code == '@' || code == '*' || code == '+' || code == '&'
17307 || code == ';' || code == '~' || code == '^' || code == '!');
17310 /* Print a memory operand whose address is ADDR. */
17313 ix86_print_operand_address_as (FILE *file, rtx addr,
17314 addr_space_t as, bool no_rip)
17316 struct ix86_address parts;
17317 rtx base, index, disp;
17323 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
17325 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17326 gcc_assert (parts.index == NULL_RTX);
17327 parts.index = XVECEXP (addr, 0, 1);
17328 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
17329 addr = XVECEXP (addr, 0, 0);
17332 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
17334 gcc_assert (TARGET_64BIT);
17335 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17338 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
17340 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
17341 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
17342 if (parts.base != NULL_RTX)
17344 parts.index = parts.base;
17347 parts.base = XVECEXP (addr, 0, 0);
17348 addr = XVECEXP (addr, 0, 0);
17350 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
17352 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
17353 gcc_assert (parts.index == NULL_RTX);
17354 parts.index = XVECEXP (addr, 0, 1);
17355 addr = XVECEXP (addr, 0, 0);
17358 ok = ix86_decompose_address (addr, &parts);
17363 index = parts.index;
17365 scale = parts.scale;
17367 if (ADDR_SPACE_GENERIC_P (as))
17370 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
17372 if (!ADDR_SPACE_GENERIC_P (as))
17374 const char *string;
17376 if (as == ADDR_SPACE_SEG_TLS)
17377 as = DEFAULT_TLS_SEG_REG;
17378 if (as == ADDR_SPACE_SEG_FS)
17379 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%fs:" : "fs:");
17380 else if (as == ADDR_SPACE_SEG_GS)
17381 string = (ASSEMBLER_DIALECT == ASM_ATT ? "%gs:" : "gs:");
17383 gcc_unreachable ();
17384 fputs (string, file);
17387 /* Use one byte shorter RIP relative addressing for 64bit mode. */
17388 if (TARGET_64BIT && !base && !index && !no_rip)
17392 if (GET_CODE (disp) == CONST
17393 && GET_CODE (XEXP (disp, 0)) == PLUS
17394 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17395 symbol = XEXP (XEXP (disp, 0), 0);
17397 if (GET_CODE (symbol) == LABEL_REF
17398 || (GET_CODE (symbol) == SYMBOL_REF
17399 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
17403 if (!base && !index)
17405 /* Displacement only requires special attention. */
17406 if (CONST_INT_P (disp))
17408 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == ADDR_SPACE_GENERIC)
17409 fputs ("ds:", file);
17410 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
17413 output_pic_addr_const (file, disp, 0);
17415 output_addr_const (file, disp);
17419 /* Print SImode register names to force addr32 prefix. */
17420 if (SImode_address_operand (addr, VOIDmode))
17424 gcc_assert (TARGET_64BIT);
17425 switch (GET_CODE (addr))
17428 gcc_assert (GET_MODE (addr) == SImode);
17429 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
17433 gcc_assert (GET_MODE (addr) == DImode);
17436 gcc_unreachable ();
17439 gcc_assert (!code);
17445 && CONST_INT_P (disp)
17446 && INTVAL (disp) < -16*1024*1024)
17448 /* X32 runs in 64-bit mode, where displacement, DISP, in
17449 address DISP(%r64), is encoded as 32-bit immediate sign-
17450 extended from 32-bit to 64-bit. For -0x40000300(%r64),
17451 address is %r64 + 0xffffffffbffffd00. When %r64 <
17452 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
17453 which is invalid for x32. The correct address is %r64
17454 - 0x40000300 == 0xf7ffdd64. To properly encode
17455 -0x40000300(%r64) for x32, we zero-extend negative
17456 displacement by forcing addr32 prefix which truncates
17457 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
17458 zero-extend all negative displacements, including -1(%rsp).
17459 However, for small negative displacements, sign-extension
17460 won't cause overflow. We only zero-extend negative
17461 displacements if they < -16*1024*1024, which is also used
17462 to check legitimate address displacements for PIC. */
17466 if (ASSEMBLER_DIALECT == ASM_ATT)
17471 output_pic_addr_const (file, disp, 0);
17472 else if (GET_CODE (disp) == LABEL_REF)
17473 output_asm_label (disp);
17475 output_addr_const (file, disp);
17480 print_reg (base, code, file);
17484 print_reg (index, vsib ? 0 : code, file);
17485 if (scale != 1 || vsib)
17486 fprintf (file, ",%d", scale);
17492 rtx offset = NULL_RTX;
17496 /* Pull out the offset of a symbol; print any symbol itself. */
17497 if (GET_CODE (disp) == CONST
17498 && GET_CODE (XEXP (disp, 0)) == PLUS
17499 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
17501 offset = XEXP (XEXP (disp, 0), 1);
17502 disp = gen_rtx_CONST (VOIDmode,
17503 XEXP (XEXP (disp, 0), 0));
17507 output_pic_addr_const (file, disp, 0);
17508 else if (GET_CODE (disp) == LABEL_REF)
17509 output_asm_label (disp);
17510 else if (CONST_INT_P (disp))
17513 output_addr_const (file, disp);
17519 print_reg (base, code, file);
17522 if (INTVAL (offset) >= 0)
17524 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17528 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
17535 print_reg (index, vsib ? 0 : code, file);
17536 if (scale != 1 || vsib)
17537 fprintf (file, "*%d", scale);
17545 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
17547 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
17550 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
17553 i386_asm_output_addr_const_extra (FILE *file, rtx x)
17557 if (GET_CODE (x) != UNSPEC)
17560 op = XVECEXP (x, 0, 0);
17561 switch (XINT (x, 1))
17563 case UNSPEC_GOTTPOFF:
17564 output_addr_const (file, op);
17565 /* FIXME: This might be @TPOFF in Sun ld. */
17566 fputs ("@gottpoff", file);
17569 output_addr_const (file, op);
17570 fputs ("@tpoff", file);
17572 case UNSPEC_NTPOFF:
17573 output_addr_const (file, op);
17575 fputs ("@tpoff", file);
17577 fputs ("@ntpoff", file);
17579 case UNSPEC_DTPOFF:
17580 output_addr_const (file, op);
17581 fputs ("@dtpoff", file);
17583 case UNSPEC_GOTNTPOFF:
17584 output_addr_const (file, op);
17586 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
17587 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
17589 fputs ("@gotntpoff", file);
17591 case UNSPEC_INDNTPOFF:
17592 output_addr_const (file, op);
17593 fputs ("@indntpoff", file);
17596 case UNSPEC_MACHOPIC_OFFSET:
17597 output_addr_const (file, op);
17599 machopic_output_function_base_name (file);
17603 case UNSPEC_STACK_CHECK:
17607 gcc_assert (flag_split_stack);
17609 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
17610 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
17612 gcc_unreachable ();
17615 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
17626 /* Split one or more double-mode RTL references into pairs of half-mode
17627 references. The RTL can be REG, offsettable MEM, integer constant, or
17628 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
17629 split and "num" is its length. lo_half and hi_half are output arrays
17630 that parallel "operands". */
17633 split_double_mode (machine_mode mode, rtx operands[],
17634 int num, rtx lo_half[], rtx hi_half[])
17636 machine_mode half_mode;
17642 half_mode = DImode;
17645 half_mode = SImode;
17648 gcc_unreachable ();
17651 byte = GET_MODE_SIZE (half_mode);
17655 rtx op = operands[num];
17657 /* simplify_subreg refuse to split volatile memory addresses,
17658 but we still have to handle it. */
17661 lo_half[num] = adjust_address (op, half_mode, 0);
17662 hi_half[num] = adjust_address (op, half_mode, byte);
17666 lo_half[num] = simplify_gen_subreg (half_mode, op,
17667 GET_MODE (op) == VOIDmode
17668 ? mode : GET_MODE (op), 0);
17669 hi_half[num] = simplify_gen_subreg (half_mode, op,
17670 GET_MODE (op) == VOIDmode
17671 ? mode : GET_MODE (op), byte);
17676 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
17677 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
17678 is the expression of the binary operation. The output may either be
17679 emitted here, or returned to the caller, like all output_* functions.
17681 There is no guarantee that the operands are the same mode, as they
17682 might be within FLOAT or FLOAT_EXTEND expressions. */
17684 #ifndef SYSV386_COMPAT
17685 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
17686 wants to fix the assemblers because that causes incompatibility
17687 with gcc. No-one wants to fix gcc because that causes
17688 incompatibility with assemblers... You can use the option of
17689 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
17690 #define SYSV386_COMPAT 1
17694 output_387_binary_op (rtx insn, rtx *operands)
17696 static char buf[40];
17699 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
17701 /* Even if we do not want to check the inputs, this documents input
17702 constraints. Which helps in understanding the following code. */
17705 if (STACK_REG_P (operands[0])
17706 && ((REG_P (operands[1])
17707 && REGNO (operands[0]) == REGNO (operands[1])
17708 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
17709 || (REG_P (operands[2])
17710 && REGNO (operands[0]) == REGNO (operands[2])
17711 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
17712 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
17715 gcc_assert (is_sse);
17718 switch (GET_CODE (operands[3]))
17721 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17722 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17730 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17731 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17739 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17740 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17748 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
17749 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
17757 gcc_unreachable ();
17764 strcpy (buf, ssep);
17765 if (GET_MODE (operands[0]) == SFmode)
17766 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
17768 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
17772 strcpy (buf, ssep + 1);
17773 if (GET_MODE (operands[0]) == SFmode)
17774 strcat (buf, "ss\t{%2, %0|%0, %2}");
17776 strcat (buf, "sd\t{%2, %0|%0, %2}");
17782 switch (GET_CODE (operands[3]))
17786 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
17787 std::swap (operands[1], operands[2]);
17789 /* know operands[0] == operands[1]. */
17791 if (MEM_P (operands[2]))
17797 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17799 if (STACK_TOP_P (operands[0]))
17800 /* How is it that we are storing to a dead operand[2]?
17801 Well, presumably operands[1] is dead too. We can't
17802 store the result to st(0) as st(0) gets popped on this
17803 instruction. Instead store to operands[2] (which I
17804 think has to be st(1)). st(1) will be popped later.
17805 gcc <= 2.8.1 didn't have this check and generated
17806 assembly code that the Unixware assembler rejected. */
17807 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17809 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17813 if (STACK_TOP_P (operands[0]))
17814 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17816 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17821 if (MEM_P (operands[1]))
17827 if (MEM_P (operands[2]))
17833 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
17836 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
17837 derived assemblers, confusingly reverse the direction of
17838 the operation for fsub{r} and fdiv{r} when the
17839 destination register is not st(0). The Intel assembler
17840 doesn't have this brain damage. Read !SYSV386_COMPAT to
17841 figure out what the hardware really does. */
17842 if (STACK_TOP_P (operands[0]))
17843 p = "{p\t%0, %2|rp\t%2, %0}";
17845 p = "{rp\t%2, %0|p\t%0, %2}";
17847 if (STACK_TOP_P (operands[0]))
17848 /* As above for fmul/fadd, we can't store to st(0). */
17849 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
17851 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
17856 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
17859 if (STACK_TOP_P (operands[0]))
17860 p = "{rp\t%0, %1|p\t%1, %0}";
17862 p = "{p\t%1, %0|rp\t%0, %1}";
17864 if (STACK_TOP_P (operands[0]))
17865 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
17867 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
17872 if (STACK_TOP_P (operands[0]))
17874 if (STACK_TOP_P (operands[1]))
17875 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
17877 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
17880 else if (STACK_TOP_P (operands[1]))
17883 p = "{\t%1, %0|r\t%0, %1}";
17885 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
17891 p = "{r\t%2, %0|\t%0, %2}";
17893 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
17899 gcc_unreachable ();
17906 /* Check if a 256bit AVX register is referenced inside of EXP. */
17909 ix86_check_avx256_register (const_rtx exp)
17911 if (SUBREG_P (exp))
17912 exp = SUBREG_REG (exp);
17914 return (REG_P (exp)
17915 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
17918 /* Return needed mode for entity in optimize_mode_switching pass. */
17921 ix86_avx_u128_mode_needed (rtx_insn *insn)
17927 /* Needed mode is set to AVX_U128_CLEAN if there are
17928 no 256bit modes used in function arguments. */
17929 for (link = CALL_INSN_FUNCTION_USAGE (insn);
17931 link = XEXP (link, 1))
17933 if (GET_CODE (XEXP (link, 0)) == USE)
17935 rtx arg = XEXP (XEXP (link, 0), 0);
17937 if (ix86_check_avx256_register (arg))
17938 return AVX_U128_DIRTY;
17942 return AVX_U128_CLEAN;
17945 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
17946 changes state only when a 256bit register is written to, but we need
17947 to prevent the compiler from moving optimal insertion point above
17948 eventual read from 256bit register. */
17949 subrtx_iterator::array_type array;
17950 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
17951 if (ix86_check_avx256_register (*iter))
17952 return AVX_U128_DIRTY;
17954 return AVX_U128_ANY;
17957 /* Return mode that i387 must be switched into
17958 prior to the execution of insn. */
17961 ix86_i387_mode_needed (int entity, rtx_insn *insn)
17963 enum attr_i387_cw mode;
17965 /* The mode UNINITIALIZED is used to store control word after a
17966 function call or ASM pattern. The mode ANY specify that function
17967 has no requirements on the control word and make no changes in the
17968 bits we are interested in. */
17971 || (NONJUMP_INSN_P (insn)
17972 && (asm_noperands (PATTERN (insn)) >= 0
17973 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
17974 return I387_CW_UNINITIALIZED;
17976 if (recog_memoized (insn) < 0)
17977 return I387_CW_ANY;
17979 mode = get_attr_i387_cw (insn);
17984 if (mode == I387_CW_TRUNC)
17989 if (mode == I387_CW_FLOOR)
17994 if (mode == I387_CW_CEIL)
17999 if (mode == I387_CW_MASK_PM)
18004 gcc_unreachable ();
18007 return I387_CW_ANY;
18010 /* Return mode that entity must be switched into
18011 prior to the execution of insn. */
18014 ix86_mode_needed (int entity, rtx_insn *insn)
18019 return ix86_avx_u128_mode_needed (insn);
18024 return ix86_i387_mode_needed (entity, insn);
18026 gcc_unreachable ();
18031 /* Check if a 256bit AVX register is referenced in stores. */
18034 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
18036 if (ix86_check_avx256_register (dest))
18038 bool *used = (bool *) data;
18043 /* Calculate mode of upper 128bit AVX registers after the insn. */
18046 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
18048 rtx pat = PATTERN (insn);
18050 if (vzeroupper_operation (pat, VOIDmode)
18051 || vzeroall_operation (pat, VOIDmode))
18052 return AVX_U128_CLEAN;
18054 /* We know that state is clean after CALL insn if there are no
18055 256bit registers used in the function return register. */
18058 bool avx_reg256_found = false;
18059 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
18061 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
18064 /* Otherwise, return current mode. Remember that if insn
18065 references AVX 256bit registers, the mode was already changed
18066 to DIRTY from MODE_NEEDED. */
18070 /* Return the mode that an insn results in. */
18073 ix86_mode_after (int entity, int mode, rtx_insn *insn)
18078 return ix86_avx_u128_mode_after (mode, insn);
18085 gcc_unreachable ();
18090 ix86_avx_u128_mode_entry (void)
18094 /* Entry mode is set to AVX_U128_DIRTY if there are
18095 256bit modes used in function arguments. */
18096 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
18097 arg = TREE_CHAIN (arg))
18099 rtx incoming = DECL_INCOMING_RTL (arg);
18101 if (incoming && ix86_check_avx256_register (incoming))
18102 return AVX_U128_DIRTY;
18105 return AVX_U128_CLEAN;
18108 /* Return a mode that ENTITY is assumed to be
18109 switched to at function entry. */
18112 ix86_mode_entry (int entity)
18117 return ix86_avx_u128_mode_entry ();
18122 return I387_CW_ANY;
18124 gcc_unreachable ();
18129 ix86_avx_u128_mode_exit (void)
18131 rtx reg = crtl->return_rtx;
18133 /* Exit mode is set to AVX_U128_DIRTY if there are
18134 256bit modes used in the function return register. */
18135 if (reg && ix86_check_avx256_register (reg))
18136 return AVX_U128_DIRTY;
18138 return AVX_U128_CLEAN;
18141 /* Return a mode that ENTITY is assumed to be
18142 switched to at function exit. */
18145 ix86_mode_exit (int entity)
18150 return ix86_avx_u128_mode_exit ();
18155 return I387_CW_ANY;
18157 gcc_unreachable ();
18162 ix86_mode_priority (int, int n)
18167 /* Output code to initialize control word copies used by trunc?f?i and
18168 rounding patterns. CURRENT_MODE is set to current control word,
18169 while NEW_MODE is set to new control word. */
18172 emit_i387_cw_initialization (int mode)
18174 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
18177 enum ix86_stack_slot slot;
18179 rtx reg = gen_reg_rtx (HImode);
18181 emit_insn (gen_x86_fnstcw_1 (stored_mode));
18182 emit_move_insn (reg, copy_rtx (stored_mode));
18184 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
18185 || optimize_insn_for_size_p ())
18189 case I387_CW_TRUNC:
18190 /* round toward zero (truncate) */
18191 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
18192 slot = SLOT_CW_TRUNC;
18195 case I387_CW_FLOOR:
18196 /* round down toward -oo */
18197 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18198 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
18199 slot = SLOT_CW_FLOOR;
18203 /* round up toward +oo */
18204 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
18205 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
18206 slot = SLOT_CW_CEIL;
18209 case I387_CW_MASK_PM:
18210 /* mask precision exception for nearbyint() */
18211 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18212 slot = SLOT_CW_MASK_PM;
18216 gcc_unreachable ();
18223 case I387_CW_TRUNC:
18224 /* round toward zero (truncate) */
18225 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
18226 slot = SLOT_CW_TRUNC;
18229 case I387_CW_FLOOR:
18230 /* round down toward -oo */
18231 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
18232 slot = SLOT_CW_FLOOR;
18236 /* round up toward +oo */
18237 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
18238 slot = SLOT_CW_CEIL;
18241 case I387_CW_MASK_PM:
18242 /* mask precision exception for nearbyint() */
18243 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
18244 slot = SLOT_CW_MASK_PM;
18248 gcc_unreachable ();
18252 gcc_assert (slot < MAX_386_STACK_LOCALS);
18254 new_mode = assign_386_stack_local (HImode, slot);
18255 emit_move_insn (new_mode, reg);
18258 /* Emit vzeroupper. */
18261 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
18265 /* Cancel automatic vzeroupper insertion if there are
18266 live call-saved SSE registers at the insertion point. */
18268 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18269 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18273 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18274 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
18277 emit_insn (gen_avx_vzeroupper ());
18280 /* Generate one or more insns to set ENTITY to MODE. */
18282 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
18283 is the set of hard registers live at the point where the insn(s)
18284 are to be inserted. */
18287 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
18288 HARD_REG_SET regs_live)
18293 if (mode == AVX_U128_CLEAN)
18294 ix86_avx_emit_vzeroupper (regs_live);
18300 if (mode != I387_CW_ANY
18301 && mode != I387_CW_UNINITIALIZED)
18302 emit_i387_cw_initialization (mode);
18305 gcc_unreachable ();
18309 /* Output code for INSN to convert a float to a signed int. OPERANDS
18310 are the insn operands. The output may be [HSD]Imode and the input
18311 operand may be [SDX]Fmode. */
18314 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
18316 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18317 int dimode_p = GET_MODE (operands[0]) == DImode;
18318 int round_mode = get_attr_i387_cw (insn);
18320 /* Jump through a hoop or two for DImode, since the hardware has no
18321 non-popping instruction. We used to do this a different way, but
18322 that was somewhat fragile and broke with post-reload splitters. */
18323 if ((dimode_p || fisttp) && !stack_top_dies)
18324 output_asm_insn ("fld\t%y1", operands);
18326 gcc_assert (STACK_TOP_P (operands[1]));
18327 gcc_assert (MEM_P (operands[0]));
18328 gcc_assert (GET_MODE (operands[1]) != TFmode);
18331 output_asm_insn ("fisttp%Z0\t%0", operands);
18334 if (round_mode != I387_CW_ANY)
18335 output_asm_insn ("fldcw\t%3", operands);
18336 if (stack_top_dies || dimode_p)
18337 output_asm_insn ("fistp%Z0\t%0", operands);
18339 output_asm_insn ("fist%Z0\t%0", operands);
18340 if (round_mode != I387_CW_ANY)
18341 output_asm_insn ("fldcw\t%2", operands);
18347 /* Output code for x87 ffreep insn. The OPNO argument, which may only
18348 have the values zero or one, indicates the ffreep insn's operand
18349 from the OPERANDS array. */
18351 static const char *
18352 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
18354 if (TARGET_USE_FFREEP)
18355 #ifdef HAVE_AS_IX86_FFREEP
18356 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
18359 static char retval[32];
18360 int regno = REGNO (operands[opno]);
18362 gcc_assert (STACK_REGNO_P (regno));
18364 regno -= FIRST_STACK_REG;
18366 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
18371 return opno ? "fstp\t%y1" : "fstp\t%y0";
18375 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
18376 should be used. UNORDERED_P is true when fucom should be used. */
18379 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
18381 int stack_top_dies;
18382 rtx cmp_op0, cmp_op1;
18383 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
18387 cmp_op0 = operands[0];
18388 cmp_op1 = operands[1];
18392 cmp_op0 = operands[1];
18393 cmp_op1 = operands[2];
18398 if (GET_MODE (operands[0]) == SFmode)
18400 return "%vucomiss\t{%1, %0|%0, %1}";
18402 return "%vcomiss\t{%1, %0|%0, %1}";
18405 return "%vucomisd\t{%1, %0|%0, %1}";
18407 return "%vcomisd\t{%1, %0|%0, %1}";
18410 gcc_assert (STACK_TOP_P (cmp_op0));
18412 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
18414 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
18416 if (stack_top_dies)
18418 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
18419 return output_387_ffreep (operands, 1);
18422 return "ftst\n\tfnstsw\t%0";
18425 if (STACK_REG_P (cmp_op1)
18427 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
18428 && REGNO (cmp_op1) != FIRST_STACK_REG)
18430 /* If both the top of the 387 stack dies, and the other operand
18431 is also a stack register that dies, then this must be a
18432 `fcompp' float compare */
18436 /* There is no double popping fcomi variant. Fortunately,
18437 eflags is immune from the fstp's cc clobbering. */
18439 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
18441 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
18442 return output_387_ffreep (operands, 0);
18447 return "fucompp\n\tfnstsw\t%0";
18449 return "fcompp\n\tfnstsw\t%0";
18454 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
18456 static const char * const alt[16] =
18458 "fcom%Z2\t%y2\n\tfnstsw\t%0",
18459 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
18460 "fucom%Z2\t%y2\n\tfnstsw\t%0",
18461 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
18463 "ficom%Z2\t%y2\n\tfnstsw\t%0",
18464 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
18468 "fcomi\t{%y1, %0|%0, %y1}",
18469 "fcomip\t{%y1, %0|%0, %y1}",
18470 "fucomi\t{%y1, %0|%0, %y1}",
18471 "fucomip\t{%y1, %0|%0, %y1}",
18482 mask = eflags_p << 3;
18483 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
18484 mask |= unordered_p << 1;
18485 mask |= stack_top_dies;
18487 gcc_assert (mask < 16);
18496 ix86_output_addr_vec_elt (FILE *file, int value)
18498 const char *directive = ASM_LONG;
18502 directive = ASM_QUAD;
18504 gcc_assert (!TARGET_64BIT);
18507 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
18511 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
18513 const char *directive = ASM_LONG;
18516 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
18517 directive = ASM_QUAD;
18519 gcc_assert (!TARGET_64BIT);
18521 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
18522 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
18523 fprintf (file, "%s%s%d-%s%d\n",
18524 directive, LPREFIX, value, LPREFIX, rel);
18525 else if (HAVE_AS_GOTOFF_IN_DATA)
18526 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
18528 else if (TARGET_MACHO)
18530 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
18531 machopic_output_function_base_name (file);
18536 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
18537 GOT_SYMBOL_NAME, LPREFIX, value);
18540 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
18544 ix86_expand_clear (rtx dest)
18548 /* We play register width games, which are only valid after reload. */
18549 gcc_assert (reload_completed);
18551 /* Avoid HImode and its attendant prefix byte. */
18552 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
18553 dest = gen_rtx_REG (SImode, REGNO (dest));
18554 tmp = gen_rtx_SET (dest, const0_rtx);
18556 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
18558 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18559 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
18565 /* X is an unchanging MEM. If it is a constant pool reference, return
18566 the constant pool rtx, else NULL. */
18569 maybe_get_pool_constant (rtx x)
18571 x = ix86_delegitimize_address (XEXP (x, 0));
18573 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
18574 return get_pool_constant (x);
18580 ix86_expand_move (machine_mode mode, rtx operands[])
18583 enum tls_model model;
18588 if (GET_CODE (op1) == SYMBOL_REF)
18592 model = SYMBOL_REF_TLS_MODEL (op1);
18595 op1 = legitimize_tls_address (op1, model, true);
18596 op1 = force_operand (op1, op0);
18599 op1 = convert_to_mode (mode, op1, 1);
18601 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
18604 else if (GET_CODE (op1) == CONST
18605 && GET_CODE (XEXP (op1, 0)) == PLUS
18606 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
18608 rtx addend = XEXP (XEXP (op1, 0), 1);
18609 rtx symbol = XEXP (XEXP (op1, 0), 0);
18612 model = SYMBOL_REF_TLS_MODEL (symbol);
18614 tmp = legitimize_tls_address (symbol, model, true);
18616 tmp = legitimize_pe_coff_symbol (symbol, true);
18620 tmp = force_operand (tmp, NULL);
18621 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
18622 op0, 1, OPTAB_DIRECT);
18625 op1 = convert_to_mode (mode, tmp, 1);
18629 if ((flag_pic || MACHOPIC_INDIRECT)
18630 && symbolic_operand (op1, mode))
18632 if (TARGET_MACHO && !TARGET_64BIT)
18635 /* dynamic-no-pic */
18636 if (MACHOPIC_INDIRECT)
18638 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
18639 ? op0 : gen_reg_rtx (Pmode);
18640 op1 = machopic_indirect_data_reference (op1, temp);
18642 op1 = machopic_legitimize_pic_address (op1, mode,
18643 temp == op1 ? 0 : temp);
18645 if (op0 != op1 && GET_CODE (op0) != MEM)
18647 rtx insn = gen_rtx_SET (op0, op1);
18651 if (GET_CODE (op0) == MEM)
18652 op1 = force_reg (Pmode, op1);
18656 if (GET_CODE (temp) != REG)
18657 temp = gen_reg_rtx (Pmode);
18658 temp = legitimize_pic_address (op1, temp);
18663 /* dynamic-no-pic */
18669 op1 = force_reg (mode, op1);
18670 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
18672 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
18673 op1 = legitimize_pic_address (op1, reg);
18676 op1 = convert_to_mode (mode, op1, 1);
18683 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
18684 || !push_operand (op0, mode))
18686 op1 = force_reg (mode, op1);
18688 if (push_operand (op0, mode)
18689 && ! general_no_elim_operand (op1, mode))
18690 op1 = copy_to_mode_reg (mode, op1);
18692 /* Force large constants in 64bit compilation into register
18693 to get them CSEed. */
18694 if (can_create_pseudo_p ()
18695 && (mode == DImode) && TARGET_64BIT
18696 && immediate_operand (op1, mode)
18697 && !x86_64_zext_immediate_operand (op1, VOIDmode)
18698 && !register_operand (op0, mode)
18700 op1 = copy_to_mode_reg (mode, op1);
18702 if (can_create_pseudo_p ()
18703 && CONST_DOUBLE_P (op1))
18705 /* If we are loading a floating point constant to a register,
18706 force the value to memory now, since we'll get better code
18707 out the back end. */
18709 op1 = validize_mem (force_const_mem (mode, op1));
18710 if (!register_operand (op0, mode))
18712 rtx temp = gen_reg_rtx (mode);
18713 emit_insn (gen_rtx_SET (temp, op1));
18714 emit_move_insn (op0, temp);
18720 emit_insn (gen_rtx_SET (op0, op1));
18724 ix86_expand_vector_move (machine_mode mode, rtx operands[])
18726 rtx op0 = operands[0], op1 = operands[1];
18727 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
18728 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
18729 unsigned int align = (TARGET_IAMCU
18730 ? GET_MODE_BITSIZE (mode)
18731 : GET_MODE_ALIGNMENT (mode));
18733 if (push_operand (op0, VOIDmode))
18734 op0 = emit_move_resolve_push (mode, op0);
18736 /* Force constants other than zero into memory. We do not know how
18737 the instructions used to build constants modify the upper 64 bits
18738 of the register, once we have that information we may be able
18739 to handle some of them more efficiently. */
18740 if (can_create_pseudo_p ()
18741 && register_operand (op0, mode)
18742 && (CONSTANT_P (op1)
18744 && CONSTANT_P (SUBREG_REG (op1))))
18745 && !standard_sse_constant_p (op1))
18746 op1 = validize_mem (force_const_mem (mode, op1));
18748 /* We need to check memory alignment for SSE mode since attribute
18749 can make operands unaligned. */
18750 if (can_create_pseudo_p ()
18751 && SSE_REG_MODE_P (mode)
18752 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
18753 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
18757 /* ix86_expand_vector_move_misalign() does not like constants ... */
18758 if (CONSTANT_P (op1)
18760 && CONSTANT_P (SUBREG_REG (op1))))
18761 op1 = validize_mem (force_const_mem (mode, op1));
18763 /* ... nor both arguments in memory. */
18764 if (!register_operand (op0, mode)
18765 && !register_operand (op1, mode))
18766 op1 = force_reg (mode, op1);
18768 tmp[0] = op0; tmp[1] = op1;
18769 ix86_expand_vector_move_misalign (mode, tmp);
18773 /* Make operand1 a register if it isn't already. */
18774 if (can_create_pseudo_p ()
18775 && !register_operand (op0, mode)
18776 && !register_operand (op1, mode))
18778 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
18782 emit_insn (gen_rtx_SET (op0, op1));
18785 /* Split 32-byte AVX unaligned load and store if needed. */
18788 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
18791 rtx (*extract) (rtx, rtx, rtx);
18792 rtx (*load_unaligned) (rtx, rtx);
18793 rtx (*store_unaligned) (rtx, rtx);
18796 switch (GET_MODE (op0))
18799 gcc_unreachable ();
18801 extract = gen_avx_vextractf128v32qi;
18802 load_unaligned = gen_avx_loaddquv32qi;
18803 store_unaligned = gen_avx_storedquv32qi;
18807 extract = gen_avx_vextractf128v8sf;
18808 load_unaligned = gen_avx_loadups256;
18809 store_unaligned = gen_avx_storeups256;
18813 extract = gen_avx_vextractf128v4df;
18814 load_unaligned = gen_avx_loadupd256;
18815 store_unaligned = gen_avx_storeupd256;
18822 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
18823 && optimize_insn_for_speed_p ())
18825 rtx r = gen_reg_rtx (mode);
18826 m = adjust_address (op1, mode, 0);
18827 emit_move_insn (r, m);
18828 m = adjust_address (op1, mode, 16);
18829 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
18830 emit_move_insn (op0, r);
18832 /* Normal *mov<mode>_internal pattern will handle
18833 unaligned loads just fine if misaligned_operand
18834 is true, and without the UNSPEC it can be combined
18835 with arithmetic instructions. */
18836 else if (misaligned_operand (op1, GET_MODE (op1)))
18837 emit_insn (gen_rtx_SET (op0, op1));
18839 emit_insn (load_unaligned (op0, op1));
18841 else if (MEM_P (op0))
18843 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
18844 && optimize_insn_for_speed_p ())
18846 m = adjust_address (op0, mode, 0);
18847 emit_insn (extract (m, op1, const0_rtx));
18848 m = adjust_address (op0, mode, 16);
18849 emit_insn (extract (m, op1, const1_rtx));
18852 emit_insn (store_unaligned (op0, op1));
18855 gcc_unreachable ();
18858 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
18859 straight to ix86_expand_vector_move. */
18860 /* Code generation for scalar reg-reg moves of single and double precision data:
18861 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
18865 if (x86_sse_partial_reg_dependency == true)
18870 Code generation for scalar loads of double precision data:
18871 if (x86_sse_split_regs == true)
18872 movlpd mem, reg (gas syntax)
18876 Code generation for unaligned packed loads of single precision data
18877 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
18878 if (x86_sse_unaligned_move_optimal)
18881 if (x86_sse_partial_reg_dependency == true)
18893 Code generation for unaligned packed loads of double precision data
18894 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
18895 if (x86_sse_unaligned_move_optimal)
18898 if (x86_sse_split_regs == true)
18911 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
18913 rtx op0, op1, orig_op0 = NULL_RTX, m;
18914 rtx (*load_unaligned) (rtx, rtx);
18915 rtx (*store_unaligned) (rtx, rtx);
18920 if (GET_MODE_SIZE (mode) == 64)
18922 switch (GET_MODE_CLASS (mode))
18924 case MODE_VECTOR_INT:
18926 if (GET_MODE (op0) != V16SImode)
18931 op0 = gen_reg_rtx (V16SImode);
18934 op0 = gen_lowpart (V16SImode, op0);
18936 op1 = gen_lowpart (V16SImode, op1);
18939 case MODE_VECTOR_FLOAT:
18940 switch (GET_MODE (op0))
18943 gcc_unreachable ();
18945 load_unaligned = gen_avx512f_loaddquv16si;
18946 store_unaligned = gen_avx512f_storedquv16si;
18949 load_unaligned = gen_avx512f_loadups512;
18950 store_unaligned = gen_avx512f_storeups512;
18953 load_unaligned = gen_avx512f_loadupd512;
18954 store_unaligned = gen_avx512f_storeupd512;
18959 emit_insn (load_unaligned (op0, op1));
18960 else if (MEM_P (op0))
18961 emit_insn (store_unaligned (op0, op1));
18963 gcc_unreachable ();
18965 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
18969 gcc_unreachable ();
18976 && GET_MODE_SIZE (mode) == 32)
18978 switch (GET_MODE_CLASS (mode))
18980 case MODE_VECTOR_INT:
18982 if (GET_MODE (op0) != V32QImode)
18987 op0 = gen_reg_rtx (V32QImode);
18990 op0 = gen_lowpart (V32QImode, op0);
18992 op1 = gen_lowpart (V32QImode, op1);
18995 case MODE_VECTOR_FLOAT:
18996 ix86_avx256_split_vector_move_misalign (op0, op1);
18998 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19002 gcc_unreachable ();
19010 /* Normal *mov<mode>_internal pattern will handle
19011 unaligned loads just fine if misaligned_operand
19012 is true, and without the UNSPEC it can be combined
19013 with arithmetic instructions. */
19015 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
19016 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
19017 && misaligned_operand (op1, GET_MODE (op1)))
19018 emit_insn (gen_rtx_SET (op0, op1));
19019 /* ??? If we have typed data, then it would appear that using
19020 movdqu is the only way to get unaligned data loaded with
19022 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19024 if (GET_MODE (op0) != V16QImode)
19027 op0 = gen_reg_rtx (V16QImode);
19029 op1 = gen_lowpart (V16QImode, op1);
19030 /* We will eventually emit movups based on insn attributes. */
19031 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
19033 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
19035 else if (TARGET_SSE2 && mode == V2DFmode)
19040 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19041 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19042 || optimize_insn_for_size_p ())
19044 /* We will eventually emit movups based on insn attributes. */
19045 emit_insn (gen_sse2_loadupd (op0, op1));
19049 /* When SSE registers are split into halves, we can avoid
19050 writing to the top half twice. */
19051 if (TARGET_SSE_SPLIT_REGS)
19053 emit_clobber (op0);
19058 /* ??? Not sure about the best option for the Intel chips.
19059 The following would seem to satisfy; the register is
19060 entirely cleared, breaking the dependency chain. We
19061 then store to the upper half, with a dependency depth
19062 of one. A rumor has it that Intel recommends two movsd
19063 followed by an unpacklpd, but this is unconfirmed. And
19064 given that the dependency depth of the unpacklpd would
19065 still be one, I'm not sure why this would be better. */
19066 zero = CONST0_RTX (V2DFmode);
19069 m = adjust_address (op1, DFmode, 0);
19070 emit_insn (gen_sse2_loadlpd (op0, zero, m));
19071 m = adjust_address (op1, DFmode, 8);
19072 emit_insn (gen_sse2_loadhpd (op0, op0, m));
19079 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
19080 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19081 || optimize_insn_for_size_p ())
19083 if (GET_MODE (op0) != V4SFmode)
19086 op0 = gen_reg_rtx (V4SFmode);
19088 op1 = gen_lowpart (V4SFmode, op1);
19089 emit_insn (gen_sse_loadups (op0, op1));
19091 emit_move_insn (orig_op0,
19092 gen_lowpart (GET_MODE (orig_op0), op0));
19096 if (mode != V4SFmode)
19097 t = gen_reg_rtx (V4SFmode);
19101 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
19102 emit_move_insn (t, CONST0_RTX (V4SFmode));
19106 m = adjust_address (op1, V2SFmode, 0);
19107 emit_insn (gen_sse_loadlps (t, t, m));
19108 m = adjust_address (op1, V2SFmode, 8);
19109 emit_insn (gen_sse_loadhps (t, t, m));
19110 if (mode != V4SFmode)
19111 emit_move_insn (op0, gen_lowpart (mode, t));
19114 else if (MEM_P (op0))
19116 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19118 op0 = gen_lowpart (V16QImode, op0);
19119 op1 = gen_lowpart (V16QImode, op1);
19120 /* We will eventually emit movups based on insn attributes. */
19121 emit_insn (gen_sse2_storedquv16qi (op0, op1));
19123 else if (TARGET_SSE2 && mode == V2DFmode)
19126 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19127 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19128 || optimize_insn_for_size_p ())
19129 /* We will eventually emit movups based on insn attributes. */
19130 emit_insn (gen_sse2_storeupd (op0, op1));
19133 m = adjust_address (op0, DFmode, 0);
19134 emit_insn (gen_sse2_storelpd (m, op1));
19135 m = adjust_address (op0, DFmode, 8);
19136 emit_insn (gen_sse2_storehpd (m, op1));
19141 if (mode != V4SFmode)
19142 op1 = gen_lowpart (V4SFmode, op1);
19145 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
19146 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19147 || optimize_insn_for_size_p ())
19149 op0 = gen_lowpart (V4SFmode, op0);
19150 emit_insn (gen_sse_storeups (op0, op1));
19154 m = adjust_address (op0, V2SFmode, 0);
19155 emit_insn (gen_sse_storelps (m, op1));
19156 m = adjust_address (op0, V2SFmode, 8);
19157 emit_insn (gen_sse_storehps (m, op1));
19162 gcc_unreachable ();
19165 /* Helper function of ix86_fixup_binary_operands to canonicalize
19166 operand order. Returns true if the operands should be swapped. */
19169 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
19172 rtx dst = operands[0];
19173 rtx src1 = operands[1];
19174 rtx src2 = operands[2];
19176 /* If the operation is not commutative, we can't do anything. */
19177 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
19180 /* Highest priority is that src1 should match dst. */
19181 if (rtx_equal_p (dst, src1))
19183 if (rtx_equal_p (dst, src2))
19186 /* Next highest priority is that immediate constants come second. */
19187 if (immediate_operand (src2, mode))
19189 if (immediate_operand (src1, mode))
19192 /* Lowest priority is that memory references should come second. */
19202 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
19203 destination to use for the operation. If different from the true
19204 destination in operands[0], a copy operation will be required. */
19207 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
19210 rtx dst = operands[0];
19211 rtx src1 = operands[1];
19212 rtx src2 = operands[2];
19214 /* Canonicalize operand order. */
19215 if (ix86_swap_binary_operands_p (code, mode, operands))
19217 /* It is invalid to swap operands of different modes. */
19218 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
19220 std::swap (src1, src2);
19223 /* Both source operands cannot be in memory. */
19224 if (MEM_P (src1) && MEM_P (src2))
19226 /* Optimization: Only read from memory once. */
19227 if (rtx_equal_p (src1, src2))
19229 src2 = force_reg (mode, src2);
19232 else if (rtx_equal_p (dst, src1))
19233 src2 = force_reg (mode, src2);
19235 src1 = force_reg (mode, src1);
19238 /* If the destination is memory, and we do not have matching source
19239 operands, do things in registers. */
19240 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19241 dst = gen_reg_rtx (mode);
19243 /* Source 1 cannot be a constant. */
19244 if (CONSTANT_P (src1))
19245 src1 = force_reg (mode, src1);
19247 /* Source 1 cannot be a non-matching memory. */
19248 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19249 src1 = force_reg (mode, src1);
19251 /* Improve address combine. */
19253 && GET_MODE_CLASS (mode) == MODE_INT
19255 src2 = force_reg (mode, src2);
19257 operands[1] = src1;
19258 operands[2] = src2;
19262 /* Similarly, but assume that the destination has already been
19263 set up properly. */
19266 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
19267 machine_mode mode, rtx operands[])
19269 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
19270 gcc_assert (dst == operands[0]);
19273 /* Attempt to expand a binary operator. Make the expansion closer to the
19274 actual machine, then just general_operand, which will allow 3 separate
19275 memory references (one output, two input) in a single insn. */
19278 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
19281 rtx src1, src2, dst, op, clob;
19283 dst = ix86_fixup_binary_operands (code, mode, operands);
19284 src1 = operands[1];
19285 src2 = operands[2];
19287 /* Emit the instruction. */
19289 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
19291 if (reload_completed
19293 && !rtx_equal_p (dst, src1))
19295 /* This is going to be an LEA; avoid splitting it later. */
19300 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19301 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19304 /* Fix up the destination if needed. */
19305 if (dst != operands[0])
19306 emit_move_insn (operands[0], dst);
19309 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
19310 the given OPERANDS. */
19313 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
19316 rtx op1 = NULL_RTX, op2 = NULL_RTX;
19317 if (SUBREG_P (operands[1]))
19322 else if (SUBREG_P (operands[2]))
19327 /* Optimize (__m128i) d | (__m128i) e and similar code
19328 when d and e are float vectors into float vector logical
19329 insn. In C/C++ without using intrinsics there is no other way
19330 to express vector logical operation on float vectors than
19331 to cast them temporarily to integer vectors. */
19333 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
19334 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
19335 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
19336 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
19337 && SUBREG_BYTE (op1) == 0
19338 && (GET_CODE (op2) == CONST_VECTOR
19339 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
19340 && SUBREG_BYTE (op2) == 0))
19341 && can_create_pseudo_p ())
19344 switch (GET_MODE (SUBREG_REG (op1)))
19352 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
19353 if (GET_CODE (op2) == CONST_VECTOR)
19355 op2 = gen_lowpart (GET_MODE (dst), op2);
19356 op2 = force_reg (GET_MODE (dst), op2);
19361 op2 = SUBREG_REG (operands[2]);
19362 if (!nonimmediate_operand (op2, GET_MODE (dst)))
19363 op2 = force_reg (GET_MODE (dst), op2);
19365 op1 = SUBREG_REG (op1);
19366 if (!nonimmediate_operand (op1, GET_MODE (dst)))
19367 op1 = force_reg (GET_MODE (dst), op1);
19368 emit_insn (gen_rtx_SET (dst,
19369 gen_rtx_fmt_ee (code, GET_MODE (dst),
19371 emit_move_insn (operands[0], gen_lowpart (mode, dst));
19377 if (!nonimmediate_operand (operands[1], mode))
19378 operands[1] = force_reg (mode, operands[1]);
19379 if (!nonimmediate_operand (operands[2], mode))
19380 operands[2] = force_reg (mode, operands[2]);
19381 ix86_fixup_binary_operands_no_copy (code, mode, operands);
19382 emit_insn (gen_rtx_SET (operands[0],
19383 gen_rtx_fmt_ee (code, mode, operands[1],
19387 /* Return TRUE or FALSE depending on whether the binary operator meets the
19388 appropriate constraints. */
19391 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
19394 rtx dst = operands[0];
19395 rtx src1 = operands[1];
19396 rtx src2 = operands[2];
19398 /* Both source operands cannot be in memory. */
19399 if (MEM_P (src1) && MEM_P (src2))
19402 /* Canonicalize operand order for commutative operators. */
19403 if (ix86_swap_binary_operands_p (code, mode, operands))
19404 std::swap (src1, src2);
19406 /* If the destination is memory, we must have a matching source operand. */
19407 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
19410 /* Source 1 cannot be a constant. */
19411 if (CONSTANT_P (src1))
19414 /* Source 1 cannot be a non-matching memory. */
19415 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
19416 /* Support "andhi/andsi/anddi" as a zero-extending move. */
19417 return (code == AND
19420 || (TARGET_64BIT && mode == DImode))
19421 && satisfies_constraint_L (src2));
19426 /* Attempt to expand a unary operator. Make the expansion closer to the
19427 actual machine, then just general_operand, which will allow 2 separate
19428 memory references (one output, one input) in a single insn. */
19431 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
19434 bool matching_memory = false;
19435 rtx src, dst, op, clob;
19440 /* If the destination is memory, and we do not have matching source
19441 operands, do things in registers. */
19444 if (rtx_equal_p (dst, src))
19445 matching_memory = true;
19447 dst = gen_reg_rtx (mode);
19450 /* When source operand is memory, destination must match. */
19451 if (MEM_P (src) && !matching_memory)
19452 src = force_reg (mode, src);
19454 /* Emit the instruction. */
19456 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
19462 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19463 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19466 /* Fix up the destination if needed. */
19467 if (dst != operands[0])
19468 emit_move_insn (operands[0], dst);
19471 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
19472 divisor are within the range [0-255]. */
19475 ix86_split_idivmod (machine_mode mode, rtx operands[],
19478 rtx_code_label *end_label, *qimode_label;
19479 rtx insn, div, mod;
19480 rtx scratch, tmp0, tmp1, tmp2;
19481 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
19482 rtx (*gen_zero_extend) (rtx, rtx);
19483 rtx (*gen_test_ccno_1) (rtx, rtx);
19488 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
19489 gen_test_ccno_1 = gen_testsi_ccno_1;
19490 gen_zero_extend = gen_zero_extendqisi2;
19493 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
19494 gen_test_ccno_1 = gen_testdi_ccno_1;
19495 gen_zero_extend = gen_zero_extendqidi2;
19498 gcc_unreachable ();
19501 end_label = gen_label_rtx ();
19502 qimode_label = gen_label_rtx ();
19504 scratch = gen_reg_rtx (mode);
19506 /* Use 8bit unsigned divimod if dividend and divisor are within
19507 the range [0-255]. */
19508 emit_move_insn (scratch, operands[2]);
19509 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
19510 scratch, 1, OPTAB_DIRECT);
19511 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
19512 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
19513 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
19514 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
19515 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
19517 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
19518 predict_jump (REG_BR_PROB_BASE * 50 / 100);
19519 JUMP_LABEL (insn) = qimode_label;
19521 /* Generate original signed/unsigned divimod. */
19522 div = gen_divmod4_1 (operands[0], operands[1],
19523 operands[2], operands[3]);
19526 /* Branch to the end. */
19527 emit_jump_insn (gen_jump (end_label));
19530 /* Generate 8bit unsigned divide. */
19531 emit_label (qimode_label);
19532 /* Don't use operands[0] for result of 8bit divide since not all
19533 registers support QImode ZERO_EXTRACT. */
19534 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
19535 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
19536 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
19537 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
19541 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
19542 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
19546 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
19547 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
19550 /* Extract remainder from AH. */
19551 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
19552 if (REG_P (operands[1]))
19553 insn = emit_move_insn (operands[1], tmp1);
19556 /* Need a new scratch register since the old one has result
19558 scratch = gen_reg_rtx (mode);
19559 emit_move_insn (scratch, tmp1);
19560 insn = emit_move_insn (operands[1], scratch);
19562 set_unique_reg_note (insn, REG_EQUAL, mod);
19564 /* Zero extend quotient from AL. */
19565 tmp1 = gen_lowpart (QImode, tmp0);
19566 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
19567 set_unique_reg_note (insn, REG_EQUAL, div);
19569 emit_label (end_label);
19572 #define LEA_MAX_STALL (3)
19573 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
19575 /* Increase given DISTANCE in half-cycles according to
19576 dependencies between PREV and NEXT instructions.
19577 Add 1 half-cycle if there is no dependency and
19578 go to next cycle if there is some dependecy. */
19580 static unsigned int
19581 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
19585 if (!prev || !next)
19586 return distance + (distance & 1) + 2;
19588 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
19589 return distance + 1;
19591 FOR_EACH_INSN_USE (use, next)
19592 FOR_EACH_INSN_DEF (def, prev)
19593 if (!DF_REF_IS_ARTIFICIAL (def)
19594 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
19595 return distance + (distance & 1) + 2;
19597 return distance + 1;
19600 /* Function checks if instruction INSN defines register number
19601 REGNO1 or REGNO2. */
19604 insn_defines_reg (unsigned int regno1, unsigned int regno2,
19609 FOR_EACH_INSN_DEF (def, insn)
19610 if (DF_REF_REG_DEF_P (def)
19611 && !DF_REF_IS_ARTIFICIAL (def)
19612 && (regno1 == DF_REF_REGNO (def)
19613 || regno2 == DF_REF_REGNO (def)))
19619 /* Function checks if instruction INSN uses register number
19620 REGNO as a part of address expression. */
19623 insn_uses_reg_mem (unsigned int regno, rtx insn)
19627 FOR_EACH_INSN_USE (use, insn)
19628 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
19634 /* Search backward for non-agu definition of register number REGNO1
19635 or register number REGNO2 in basic block starting from instruction
19636 START up to head of basic block or instruction INSN.
19638 Function puts true value into *FOUND var if definition was found
19639 and false otherwise.
19641 Distance in half-cycles between START and found instruction or head
19642 of BB is added to DISTANCE and returned. */
19645 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
19646 rtx_insn *insn, int distance,
19647 rtx_insn *start, bool *found)
19649 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
19650 rtx_insn *prev = start;
19651 rtx_insn *next = NULL;
19657 && distance < LEA_SEARCH_THRESHOLD)
19659 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
19661 distance = increase_distance (prev, next, distance);
19662 if (insn_defines_reg (regno1, regno2, prev))
19664 if (recog_memoized (prev) < 0
19665 || get_attr_type (prev) != TYPE_LEA)
19674 if (prev == BB_HEAD (bb))
19677 prev = PREV_INSN (prev);
19683 /* Search backward for non-agu definition of register number REGNO1
19684 or register number REGNO2 in INSN's basic block until
19685 1. Pass LEA_SEARCH_THRESHOLD instructions, or
19686 2. Reach neighbour BBs boundary, or
19687 3. Reach agu definition.
19688 Returns the distance between the non-agu definition point and INSN.
19689 If no definition point, returns -1. */
19692 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
19695 basic_block bb = BLOCK_FOR_INSN (insn);
19697 bool found = false;
19699 if (insn != BB_HEAD (bb))
19700 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
19701 distance, PREV_INSN (insn),
19704 if (!found && distance < LEA_SEARCH_THRESHOLD)
19708 bool simple_loop = false;
19710 FOR_EACH_EDGE (e, ei, bb->preds)
19713 simple_loop = true;
19718 distance = distance_non_agu_define_in_bb (regno1, regno2,
19720 BB_END (bb), &found);
19723 int shortest_dist = -1;
19724 bool found_in_bb = false;
19726 FOR_EACH_EDGE (e, ei, bb->preds)
19729 = distance_non_agu_define_in_bb (regno1, regno2,
19735 if (shortest_dist < 0)
19736 shortest_dist = bb_dist;
19737 else if (bb_dist > 0)
19738 shortest_dist = MIN (bb_dist, shortest_dist);
19744 distance = shortest_dist;
19748 /* get_attr_type may modify recog data. We want to make sure
19749 that recog data is valid for instruction INSN, on which
19750 distance_non_agu_define is called. INSN is unchanged here. */
19751 extract_insn_cached (insn);
19756 return distance >> 1;
19759 /* Return the distance in half-cycles between INSN and the next
19760 insn that uses register number REGNO in memory address added
19761 to DISTANCE. Return -1 if REGNO0 is set.
19763 Put true value into *FOUND if register usage was found and
19765 Put true value into *REDEFINED if register redefinition was
19766 found and false otherwise. */
19769 distance_agu_use_in_bb (unsigned int regno,
19770 rtx_insn *insn, int distance, rtx_insn *start,
19771 bool *found, bool *redefined)
19773 basic_block bb = NULL;
19774 rtx_insn *next = start;
19775 rtx_insn *prev = NULL;
19778 *redefined = false;
19780 if (start != NULL_RTX)
19782 bb = BLOCK_FOR_INSN (start);
19783 if (start != BB_HEAD (bb))
19784 /* If insn and start belong to the same bb, set prev to insn,
19785 so the call to increase_distance will increase the distance
19786 between insns by 1. */
19792 && distance < LEA_SEARCH_THRESHOLD)
19794 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
19796 distance = increase_distance(prev, next, distance);
19797 if (insn_uses_reg_mem (regno, next))
19799 /* Return DISTANCE if OP0 is used in memory
19800 address in NEXT. */
19805 if (insn_defines_reg (regno, INVALID_REGNUM, next))
19807 /* Return -1 if OP0 is set in NEXT. */
19815 if (next == BB_END (bb))
19818 next = NEXT_INSN (next);
19824 /* Return the distance between INSN and the next insn that uses
19825 register number REGNO0 in memory address. Return -1 if no such
19826 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
19829 distance_agu_use (unsigned int regno0, rtx_insn *insn)
19831 basic_block bb = BLOCK_FOR_INSN (insn);
19833 bool found = false;
19834 bool redefined = false;
19836 if (insn != BB_END (bb))
19837 distance = distance_agu_use_in_bb (regno0, insn, distance,
19839 &found, &redefined);
19841 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
19845 bool simple_loop = false;
19847 FOR_EACH_EDGE (e, ei, bb->succs)
19850 simple_loop = true;
19855 distance = distance_agu_use_in_bb (regno0, insn,
19856 distance, BB_HEAD (bb),
19857 &found, &redefined);
19860 int shortest_dist = -1;
19861 bool found_in_bb = false;
19862 bool redefined_in_bb = false;
19864 FOR_EACH_EDGE (e, ei, bb->succs)
19867 = distance_agu_use_in_bb (regno0, insn,
19868 distance, BB_HEAD (e->dest),
19869 &found_in_bb, &redefined_in_bb);
19872 if (shortest_dist < 0)
19873 shortest_dist = bb_dist;
19874 else if (bb_dist > 0)
19875 shortest_dist = MIN (bb_dist, shortest_dist);
19881 distance = shortest_dist;
19885 if (!found || redefined)
19888 return distance >> 1;
19891 /* Define this macro to tune LEA priority vs ADD, it take effect when
19892 there is a dilemma of choicing LEA or ADD
19893 Negative value: ADD is more preferred than LEA
19895 Positive value: LEA is more preferred than ADD*/
19896 #define IX86_LEA_PRIORITY 0
19898 /* Return true if usage of lea INSN has performance advantage
19899 over a sequence of instructions. Instructions sequence has
19900 SPLIT_COST cycles higher latency than lea latency. */
19903 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
19904 unsigned int regno2, int split_cost, bool has_scale)
19906 int dist_define, dist_use;
19908 /* For Silvermont if using a 2-source or 3-source LEA for
19909 non-destructive destination purposes, or due to wanting
19910 ability to use SCALE, the use of LEA is justified. */
19911 if (TARGET_SILVERMONT || TARGET_INTEL)
19915 if (split_cost < 1)
19917 if (regno0 == regno1 || regno0 == regno2)
19922 dist_define = distance_non_agu_define (regno1, regno2, insn);
19923 dist_use = distance_agu_use (regno0, insn);
19925 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
19927 /* If there is no non AGU operand definition, no AGU
19928 operand usage and split cost is 0 then both lea
19929 and non lea variants have same priority. Currently
19930 we prefer lea for 64 bit code and non lea on 32 bit
19932 if (dist_use < 0 && split_cost == 0)
19933 return TARGET_64BIT || IX86_LEA_PRIORITY;
19938 /* With longer definitions distance lea is more preferable.
19939 Here we change it to take into account splitting cost and
19941 dist_define += split_cost + IX86_LEA_PRIORITY;
19943 /* If there is no use in memory addess then we just check
19944 that split cost exceeds AGU stall. */
19946 return dist_define > LEA_MAX_STALL;
19948 /* If this insn has both backward non-agu dependence and forward
19949 agu dependence, the one with short distance takes effect. */
19950 return dist_define >= dist_use;
19953 /* Return true if it is legal to clobber flags by INSN and
19954 false otherwise. */
19957 ix86_ok_to_clobber_flags (rtx_insn *insn)
19959 basic_block bb = BLOCK_FOR_INSN (insn);
19965 if (NONDEBUG_INSN_P (insn))
19967 FOR_EACH_INSN_USE (use, insn)
19968 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
19971 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
19975 if (insn == BB_END (bb))
19978 insn = NEXT_INSN (insn);
19981 live = df_get_live_out(bb);
19982 return !REGNO_REG_SET_P (live, FLAGS_REG);
19985 /* Return true if we need to split op0 = op1 + op2 into a sequence of
19986 move and add to avoid AGU stalls. */
19989 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
19991 unsigned int regno0, regno1, regno2;
19993 /* Check if we need to optimize. */
19994 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19997 /* Check it is correct to split here. */
19998 if (!ix86_ok_to_clobber_flags(insn))
20001 regno0 = true_regnum (operands[0]);
20002 regno1 = true_regnum (operands[1]);
20003 regno2 = true_regnum (operands[2]);
20005 /* We need to split only adds with non destructive
20006 destination operand. */
20007 if (regno0 == regno1 || regno0 == regno2)
20010 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
20013 /* Return true if we should emit lea instruction instead of mov
20017 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
20019 unsigned int regno0, regno1;
20021 /* Check if we need to optimize. */
20022 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20025 /* Use lea for reg to reg moves only. */
20026 if (!REG_P (operands[0]) || !REG_P (operands[1]))
20029 regno0 = true_regnum (operands[0]);
20030 regno1 = true_regnum (operands[1]);
20032 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
20035 /* Return true if we need to split lea into a sequence of
20036 instructions to avoid AGU stalls. */
20039 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
20041 unsigned int regno0, regno1, regno2;
20043 struct ix86_address parts;
20046 /* Check we need to optimize. */
20047 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
20050 /* The "at least two components" test below might not catch simple
20051 move or zero extension insns if parts.base is non-NULL and parts.disp
20052 is const0_rtx as the only components in the address, e.g. if the
20053 register is %rbp or %r13. As this test is much cheaper and moves or
20054 zero extensions are the common case, do this check first. */
20055 if (REG_P (operands[1])
20056 || (SImode_address_operand (operands[1], VOIDmode)
20057 && REG_P (XEXP (operands[1], 0))))
20060 /* Check if it is OK to split here. */
20061 if (!ix86_ok_to_clobber_flags (insn))
20064 ok = ix86_decompose_address (operands[1], &parts);
20067 /* There should be at least two components in the address. */
20068 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
20069 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
20072 /* We should not split into add if non legitimate pic
20073 operand is used as displacement. */
20074 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
20077 regno0 = true_regnum (operands[0]) ;
20078 regno1 = INVALID_REGNUM;
20079 regno2 = INVALID_REGNUM;
20082 regno1 = true_regnum (parts.base);
20084 regno2 = true_regnum (parts.index);
20088 /* Compute how many cycles we will add to execution time
20089 if split lea into a sequence of instructions. */
20090 if (parts.base || parts.index)
20092 /* Have to use mov instruction if non desctructive
20093 destination form is used. */
20094 if (regno1 != regno0 && regno2 != regno0)
20097 /* Have to add index to base if both exist. */
20098 if (parts.base && parts.index)
20101 /* Have to use shift and adds if scale is 2 or greater. */
20102 if (parts.scale > 1)
20104 if (regno0 != regno1)
20106 else if (regno2 == regno0)
20109 split_cost += parts.scale;
20112 /* Have to use add instruction with immediate if
20113 disp is non zero. */
20114 if (parts.disp && parts.disp != const0_rtx)
20117 /* Subtract the price of lea. */
20121 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
20125 /* Emit x86 binary operand CODE in mode MODE, where the first operand
20126 matches destination. RTX includes clobber of FLAGS_REG. */
20129 ix86_emit_binop (enum rtx_code code, machine_mode mode,
20134 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
20135 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20137 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
20140 /* Return true if regno1 def is nearest to the insn. */
20143 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
20145 rtx_insn *prev = insn;
20146 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
20150 while (prev && prev != start)
20152 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
20154 prev = PREV_INSN (prev);
20157 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
20159 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
20161 prev = PREV_INSN (prev);
20164 /* None of the regs is defined in the bb. */
20168 /* Split lea instructions into a sequence of instructions
20169 which are executed on ALU to avoid AGU stalls.
20170 It is assumed that it is allowed to clobber flags register
20171 at lea position. */
20174 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
20176 unsigned int regno0, regno1, regno2;
20177 struct ix86_address parts;
20181 ok = ix86_decompose_address (operands[1], &parts);
20184 target = gen_lowpart (mode, operands[0]);
20186 regno0 = true_regnum (target);
20187 regno1 = INVALID_REGNUM;
20188 regno2 = INVALID_REGNUM;
20192 parts.base = gen_lowpart (mode, parts.base);
20193 regno1 = true_regnum (parts.base);
20198 parts.index = gen_lowpart (mode, parts.index);
20199 regno2 = true_regnum (parts.index);
20203 parts.disp = gen_lowpart (mode, parts.disp);
20205 if (parts.scale > 1)
20207 /* Case r1 = r1 + ... */
20208 if (regno1 == regno0)
20210 /* If we have a case r1 = r1 + C * r2 then we
20211 should use multiplication which is very
20212 expensive. Assume cost model is wrong if we
20213 have such case here. */
20214 gcc_assert (regno2 != regno0);
20216 for (adds = parts.scale; adds > 0; adds--)
20217 ix86_emit_binop (PLUS, mode, target, parts.index);
20221 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
20222 if (regno0 != regno2)
20223 emit_insn (gen_rtx_SET (target, parts.index));
20225 /* Use shift for scaling. */
20226 ix86_emit_binop (ASHIFT, mode, target,
20227 GEN_INT (exact_log2 (parts.scale)));
20230 ix86_emit_binop (PLUS, mode, target, parts.base);
20232 if (parts.disp && parts.disp != const0_rtx)
20233 ix86_emit_binop (PLUS, mode, target, parts.disp);
20236 else if (!parts.base && !parts.index)
20238 gcc_assert(parts.disp);
20239 emit_insn (gen_rtx_SET (target, parts.disp));
20245 if (regno0 != regno2)
20246 emit_insn (gen_rtx_SET (target, parts.index));
20248 else if (!parts.index)
20250 if (regno0 != regno1)
20251 emit_insn (gen_rtx_SET (target, parts.base));
20255 if (regno0 == regno1)
20257 else if (regno0 == regno2)
20263 /* Find better operand for SET instruction, depending
20264 on which definition is farther from the insn. */
20265 if (find_nearest_reg_def (insn, regno1, regno2))
20266 tmp = parts.index, tmp1 = parts.base;
20268 tmp = parts.base, tmp1 = parts.index;
20270 emit_insn (gen_rtx_SET (target, tmp));
20272 if (parts.disp && parts.disp != const0_rtx)
20273 ix86_emit_binop (PLUS, mode, target, parts.disp);
20275 ix86_emit_binop (PLUS, mode, target, tmp1);
20279 ix86_emit_binop (PLUS, mode, target, tmp);
20282 if (parts.disp && parts.disp != const0_rtx)
20283 ix86_emit_binop (PLUS, mode, target, parts.disp);
20287 /* Return true if it is ok to optimize an ADD operation to LEA
20288 operation to avoid flag register consumation. For most processors,
20289 ADD is faster than LEA. For the processors like BONNELL, if the
20290 destination register of LEA holds an actual address which will be
20291 used soon, LEA is better and otherwise ADD is better. */
20294 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
20296 unsigned int regno0 = true_regnum (operands[0]);
20297 unsigned int regno1 = true_regnum (operands[1]);
20298 unsigned int regno2 = true_regnum (operands[2]);
20300 /* If a = b + c, (a!=b && a!=c), must use lea form. */
20301 if (regno0 != regno1 && regno0 != regno2)
20304 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
20307 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
20310 /* Return true if destination reg of SET_BODY is shift count of
20314 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
20320 /* Retrieve destination of SET_BODY. */
20321 switch (GET_CODE (set_body))
20324 set_dest = SET_DEST (set_body);
20325 if (!set_dest || !REG_P (set_dest))
20329 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
20330 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
20338 /* Retrieve shift count of USE_BODY. */
20339 switch (GET_CODE (use_body))
20342 shift_rtx = XEXP (use_body, 1);
20345 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
20346 if (ix86_dep_by_shift_count_body (set_body,
20347 XVECEXP (use_body, 0, i)))
20355 && (GET_CODE (shift_rtx) == ASHIFT
20356 || GET_CODE (shift_rtx) == LSHIFTRT
20357 || GET_CODE (shift_rtx) == ASHIFTRT
20358 || GET_CODE (shift_rtx) == ROTATE
20359 || GET_CODE (shift_rtx) == ROTATERT))
20361 rtx shift_count = XEXP (shift_rtx, 1);
20363 /* Return true if shift count is dest of SET_BODY. */
20364 if (REG_P (shift_count))
20366 /* Add check since it can be invoked before register
20367 allocation in pre-reload schedule. */
20368 if (reload_completed
20369 && true_regnum (set_dest) == true_regnum (shift_count))
20371 else if (REGNO(set_dest) == REGNO(shift_count))
20379 /* Return true if destination reg of SET_INSN is shift count of
20383 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
20385 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
20386 PATTERN (use_insn));
20389 /* Return TRUE or FALSE depending on whether the unary operator meets the
20390 appropriate constraints. */
20393 ix86_unary_operator_ok (enum rtx_code,
20397 /* If one of operands is memory, source and destination must match. */
20398 if ((MEM_P (operands[0])
20399 || MEM_P (operands[1]))
20400 && ! rtx_equal_p (operands[0], operands[1]))
20405 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
20406 are ok, keeping in mind the possible movddup alternative. */
20409 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
20411 if (MEM_P (operands[0]))
20412 return rtx_equal_p (operands[0], operands[1 + high]);
20413 if (MEM_P (operands[1]) && MEM_P (operands[2]))
20414 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
20418 /* Post-reload splitter for converting an SF or DFmode value in an
20419 SSE register into an unsigned SImode. */
20422 ix86_split_convert_uns_si_sse (rtx operands[])
20424 machine_mode vecmode;
20425 rtx value, large, zero_or_two31, input, two31, x;
20427 large = operands[1];
20428 zero_or_two31 = operands[2];
20429 input = operands[3];
20430 two31 = operands[4];
20431 vecmode = GET_MODE (large);
20432 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
20434 /* Load up the value into the low element. We must ensure that the other
20435 elements are valid floats -- zero is the easiest such value. */
20438 if (vecmode == V4SFmode)
20439 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
20441 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
20445 input = gen_rtx_REG (vecmode, REGNO (input));
20446 emit_move_insn (value, CONST0_RTX (vecmode));
20447 if (vecmode == V4SFmode)
20448 emit_insn (gen_sse_movss (value, value, input));
20450 emit_insn (gen_sse2_movsd (value, value, input));
20453 emit_move_insn (large, two31);
20454 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
20456 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
20457 emit_insn (gen_rtx_SET (large, x));
20459 x = gen_rtx_AND (vecmode, zero_or_two31, large);
20460 emit_insn (gen_rtx_SET (zero_or_two31, x));
20462 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
20463 emit_insn (gen_rtx_SET (value, x));
20465 large = gen_rtx_REG (V4SImode, REGNO (large));
20466 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
20468 x = gen_rtx_REG (V4SImode, REGNO (value));
20469 if (vecmode == V4SFmode)
20470 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
20472 emit_insn (gen_sse2_cvttpd2dq (x, value));
20475 emit_insn (gen_xorv4si3 (value, value, large));
20478 /* Convert an unsigned DImode value into a DFmode, using only SSE.
20479 Expects the 64-bit DImode to be supplied in a pair of integral
20480 registers. Requires SSE2; will use SSE3 if available. For x86_32,
20481 -mfpmath=sse, !optimize_size only. */
20484 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
20486 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
20487 rtx int_xmm, fp_xmm;
20488 rtx biases, exponents;
20491 int_xmm = gen_reg_rtx (V4SImode);
20492 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
20493 emit_insn (gen_movdi_to_sse (int_xmm, input));
20494 else if (TARGET_SSE_SPLIT_REGS)
20496 emit_clobber (int_xmm);
20497 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
20501 x = gen_reg_rtx (V2DImode);
20502 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
20503 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
20506 x = gen_rtx_CONST_VECTOR (V4SImode,
20507 gen_rtvec (4, GEN_INT (0x43300000UL),
20508 GEN_INT (0x45300000UL),
20509 const0_rtx, const0_rtx));
20510 exponents = validize_mem (force_const_mem (V4SImode, x));
20512 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
20513 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
20515 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
20516 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
20517 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
20518 (0x1.0p84 + double(fp_value_hi_xmm)).
20519 Note these exponents differ by 32. */
20521 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
20523 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
20524 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
20525 real_ldexp (&bias_lo_rvt, &dconst1, 52);
20526 real_ldexp (&bias_hi_rvt, &dconst1, 84);
20527 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
20528 x = const_double_from_real_value (bias_hi_rvt, DFmode);
20529 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
20530 biases = validize_mem (force_const_mem (V2DFmode, biases));
20531 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
20533 /* Add the upper and lower DFmode values together. */
20535 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
20538 x = copy_to_mode_reg (V2DFmode, fp_xmm);
20539 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
20540 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
20543 ix86_expand_vector_extract (false, target, fp_xmm, 0);
20546 /* Not used, but eases macroization of patterns. */
20548 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
20550 gcc_unreachable ();
20553 /* Convert an unsigned SImode value into a DFmode. Only currently used
20554 for SSE, but applicable anywhere. */
20557 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
20559 REAL_VALUE_TYPE TWO31r;
20562 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
20563 NULL, 1, OPTAB_DIRECT);
20565 fp = gen_reg_rtx (DFmode);
20566 emit_insn (gen_floatsidf2 (fp, x));
20568 real_ldexp (&TWO31r, &dconst1, 31);
20569 x = const_double_from_real_value (TWO31r, DFmode);
20571 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
20573 emit_move_insn (target, x);
20576 /* Convert a signed DImode value into a DFmode. Only used for SSE in
20577 32-bit mode; otherwise we have a direct convert instruction. */
20580 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
20582 REAL_VALUE_TYPE TWO32r;
20583 rtx fp_lo, fp_hi, x;
20585 fp_lo = gen_reg_rtx (DFmode);
20586 fp_hi = gen_reg_rtx (DFmode);
20588 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
20590 real_ldexp (&TWO32r, &dconst1, 32);
20591 x = const_double_from_real_value (TWO32r, DFmode);
20592 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
20594 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
20596 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
20599 emit_move_insn (target, x);
20602 /* Convert an unsigned SImode value into a SFmode, using only SSE.
20603 For x86_32, -mfpmath=sse, !optimize_size only. */
20605 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
20607 REAL_VALUE_TYPE ONE16r;
20608 rtx fp_hi, fp_lo, int_hi, int_lo, x;
20610 real_ldexp (&ONE16r, &dconst1, 16);
20611 x = const_double_from_real_value (ONE16r, SFmode);
20612 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
20613 NULL, 0, OPTAB_DIRECT);
20614 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
20615 NULL, 0, OPTAB_DIRECT);
20616 fp_hi = gen_reg_rtx (SFmode);
20617 fp_lo = gen_reg_rtx (SFmode);
20618 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
20619 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
20620 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
20622 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
20624 if (!rtx_equal_p (target, fp_hi))
20625 emit_move_insn (target, fp_hi);
20628 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
20629 a vector of unsigned ints VAL to vector of floats TARGET. */
20632 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
20635 REAL_VALUE_TYPE TWO16r;
20636 machine_mode intmode = GET_MODE (val);
20637 machine_mode fltmode = GET_MODE (target);
20638 rtx (*cvt) (rtx, rtx);
20640 if (intmode == V4SImode)
20641 cvt = gen_floatv4siv4sf2;
20643 cvt = gen_floatv8siv8sf2;
20644 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
20645 tmp[0] = force_reg (intmode, tmp[0]);
20646 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
20648 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
20649 NULL_RTX, 1, OPTAB_DIRECT);
20650 tmp[3] = gen_reg_rtx (fltmode);
20651 emit_insn (cvt (tmp[3], tmp[1]));
20652 tmp[4] = gen_reg_rtx (fltmode);
20653 emit_insn (cvt (tmp[4], tmp[2]));
20654 real_ldexp (&TWO16r, &dconst1, 16);
20655 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
20656 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
20657 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
20659 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
20661 if (tmp[7] != target)
20662 emit_move_insn (target, tmp[7]);
20665 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
20666 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
20667 This is done by doing just signed conversion if < 0x1p31, and otherwise by
20668 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
20671 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
20673 REAL_VALUE_TYPE TWO31r;
20674 rtx two31r, tmp[4];
20675 machine_mode mode = GET_MODE (val);
20676 machine_mode scalarmode = GET_MODE_INNER (mode);
20677 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
20678 rtx (*cmp) (rtx, rtx, rtx, rtx);
20681 for (i = 0; i < 3; i++)
20682 tmp[i] = gen_reg_rtx (mode);
20683 real_ldexp (&TWO31r, &dconst1, 31);
20684 two31r = const_double_from_real_value (TWO31r, scalarmode);
20685 two31r = ix86_build_const_vector (mode, 1, two31r);
20686 two31r = force_reg (mode, two31r);
20689 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
20690 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
20691 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
20692 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
20693 default: gcc_unreachable ();
20695 tmp[3] = gen_rtx_LE (mode, two31r, val);
20696 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
20697 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
20699 if (intmode == V4SImode || TARGET_AVX2)
20700 *xorp = expand_simple_binop (intmode, ASHIFT,
20701 gen_lowpart (intmode, tmp[0]),
20702 GEN_INT (31), NULL_RTX, 0,
20706 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
20707 two31 = ix86_build_const_vector (intmode, 1, two31);
20708 *xorp = expand_simple_binop (intmode, AND,
20709 gen_lowpart (intmode, tmp[0]),
20710 two31, NULL_RTX, 0,
20713 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
20717 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
20718 then replicate the value for all elements of the vector
20722 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
20726 machine_mode scalar_mode;
20749 n_elt = GET_MODE_NUNITS (mode);
20750 v = rtvec_alloc (n_elt);
20751 scalar_mode = GET_MODE_INNER (mode);
20753 RTVEC_ELT (v, 0) = value;
20755 for (i = 1; i < n_elt; ++i)
20756 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
20758 return gen_rtx_CONST_VECTOR (mode, v);
20761 gcc_unreachable ();
20765 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
20766 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
20767 for an SSE register. If VECT is true, then replicate the mask for
20768 all elements of the vector register. If INVERT is true, then create
20769 a mask excluding the sign bit. */
20772 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
20774 machine_mode vec_mode, imode;
20802 vec_mode = VOIDmode;
20807 gcc_unreachable ();
20810 machine_mode inner_mode = GET_MODE_INNER (mode);
20811 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
20812 GET_MODE_BITSIZE (inner_mode));
20814 w = wi::bit_not (w);
20816 /* Force this value into the low part of a fp vector constant. */
20817 mask = immed_wide_int_const (w, imode);
20818 mask = gen_lowpart (inner_mode, mask);
20820 if (vec_mode == VOIDmode)
20821 return force_reg (inner_mode, mask);
20823 v = ix86_build_const_vector (vec_mode, vect, mask);
20824 return force_reg (vec_mode, v);
20827 /* Generate code for floating point ABS or NEG. */
20830 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
20833 rtx mask, set, dst, src;
20834 bool use_sse = false;
20835 bool vector_mode = VECTOR_MODE_P (mode);
20836 machine_mode vmode = mode;
20840 else if (mode == TFmode)
20842 else if (TARGET_SSE_MATH)
20844 use_sse = SSE_FLOAT_MODE_P (mode);
20845 if (mode == SFmode)
20847 else if (mode == DFmode)
20851 /* NEG and ABS performed with SSE use bitwise mask operations.
20852 Create the appropriate mask now. */
20854 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
20861 set = gen_rtx_fmt_e (code, mode, src);
20862 set = gen_rtx_SET (dst, set);
20869 use = gen_rtx_USE (VOIDmode, mask);
20871 par = gen_rtvec (2, set, use);
20874 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
20875 par = gen_rtvec (3, set, use, clob);
20877 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
20883 /* Expand a copysign operation. Special case operand 0 being a constant. */
20886 ix86_expand_copysign (rtx operands[])
20888 machine_mode mode, vmode;
20889 rtx dest, op0, op1, mask, nmask;
20891 dest = operands[0];
20895 mode = GET_MODE (dest);
20897 if (mode == SFmode)
20899 else if (mode == DFmode)
20904 if (CONST_DOUBLE_P (op0))
20906 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
20908 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
20909 op0 = simplify_unary_operation (ABS, mode, op0, mode);
20911 if (mode == SFmode || mode == DFmode)
20913 if (op0 == CONST0_RTX (mode))
20914 op0 = CONST0_RTX (vmode);
20917 rtx v = ix86_build_const_vector (vmode, false, op0);
20919 op0 = force_reg (vmode, v);
20922 else if (op0 != CONST0_RTX (mode))
20923 op0 = force_reg (mode, op0);
20925 mask = ix86_build_signbit_mask (vmode, 0, 0);
20927 if (mode == SFmode)
20928 copysign_insn = gen_copysignsf3_const;
20929 else if (mode == DFmode)
20930 copysign_insn = gen_copysigndf3_const;
20932 copysign_insn = gen_copysigntf3_const;
20934 emit_insn (copysign_insn (dest, op0, op1, mask));
20938 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
20940 nmask = ix86_build_signbit_mask (vmode, 0, 1);
20941 mask = ix86_build_signbit_mask (vmode, 0, 0);
20943 if (mode == SFmode)
20944 copysign_insn = gen_copysignsf3_var;
20945 else if (mode == DFmode)
20946 copysign_insn = gen_copysigndf3_var;
20948 copysign_insn = gen_copysigntf3_var;
20950 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
20954 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
20955 be a constant, and so has already been expanded into a vector constant. */
20958 ix86_split_copysign_const (rtx operands[])
20960 machine_mode mode, vmode;
20961 rtx dest, op0, mask, x;
20963 dest = operands[0];
20965 mask = operands[3];
20967 mode = GET_MODE (dest);
20968 vmode = GET_MODE (mask);
20970 dest = simplify_gen_subreg (vmode, dest, mode, 0);
20971 x = gen_rtx_AND (vmode, dest, mask);
20972 emit_insn (gen_rtx_SET (dest, x));
20974 if (op0 != CONST0_RTX (vmode))
20976 x = gen_rtx_IOR (vmode, dest, op0);
20977 emit_insn (gen_rtx_SET (dest, x));
20981 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
20982 so we have to do two masks. */
20985 ix86_split_copysign_var (rtx operands[])
20987 machine_mode mode, vmode;
20988 rtx dest, scratch, op0, op1, mask, nmask, x;
20990 dest = operands[0];
20991 scratch = operands[1];
20994 nmask = operands[4];
20995 mask = operands[5];
20997 mode = GET_MODE (dest);
20998 vmode = GET_MODE (mask);
21000 if (rtx_equal_p (op0, op1))
21002 /* Shouldn't happen often (it's useless, obviously), but when it does
21003 we'd generate incorrect code if we continue below. */
21004 emit_move_insn (dest, op0);
21008 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
21010 gcc_assert (REGNO (op1) == REGNO (scratch));
21012 x = gen_rtx_AND (vmode, scratch, mask);
21013 emit_insn (gen_rtx_SET (scratch, x));
21016 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21017 x = gen_rtx_NOT (vmode, dest);
21018 x = gen_rtx_AND (vmode, x, op0);
21019 emit_insn (gen_rtx_SET (dest, x));
21023 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
21025 x = gen_rtx_AND (vmode, scratch, mask);
21027 else /* alternative 2,4 */
21029 gcc_assert (REGNO (mask) == REGNO (scratch));
21030 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
21031 x = gen_rtx_AND (vmode, scratch, op1);
21033 emit_insn (gen_rtx_SET (scratch, x));
21035 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
21037 dest = simplify_gen_subreg (vmode, op0, mode, 0);
21038 x = gen_rtx_AND (vmode, dest, nmask);
21040 else /* alternative 3,4 */
21042 gcc_assert (REGNO (nmask) == REGNO (dest));
21044 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
21045 x = gen_rtx_AND (vmode, dest, op0);
21047 emit_insn (gen_rtx_SET (dest, x));
21050 x = gen_rtx_IOR (vmode, dest, scratch);
21051 emit_insn (gen_rtx_SET (dest, x));
21054 /* Return TRUE or FALSE depending on whether the first SET in INSN
21055 has source and destination with matching CC modes, and that the
21056 CC mode is at least as constrained as REQ_MODE. */
21059 ix86_match_ccmode (rtx insn, machine_mode req_mode)
21062 machine_mode set_mode;
21064 set = PATTERN (insn);
21065 if (GET_CODE (set) == PARALLEL)
21066 set = XVECEXP (set, 0, 0);
21067 gcc_assert (GET_CODE (set) == SET);
21068 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
21070 set_mode = GET_MODE (SET_DEST (set));
21074 if (req_mode != CCNOmode
21075 && (req_mode != CCmode
21076 || XEXP (SET_SRC (set), 1) != const0_rtx))
21080 if (req_mode == CCGCmode)
21084 if (req_mode == CCGOCmode || req_mode == CCNOmode)
21088 if (req_mode == CCZmode)
21099 if (set_mode != req_mode)
21104 gcc_unreachable ();
21107 return GET_MODE (SET_SRC (set)) == set_mode;
21110 /* Generate insn patterns to do an integer compare of OPERANDS. */
21113 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
21115 machine_mode cmpmode;
21118 cmpmode = SELECT_CC_MODE (code, op0, op1);
21119 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
21121 /* This is very simple, but making the interface the same as in the
21122 FP case makes the rest of the code easier. */
21123 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
21124 emit_insn (gen_rtx_SET (flags, tmp));
21126 /* Return the test that should be put into the flags user, i.e.
21127 the bcc, scc, or cmov instruction. */
21128 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
21131 /* Figure out whether to use ordered or unordered fp comparisons.
21132 Return the appropriate mode to use. */
21135 ix86_fp_compare_mode (enum rtx_code)
21137 /* ??? In order to make all comparisons reversible, we do all comparisons
21138 non-trapping when compiling for IEEE. Once gcc is able to distinguish
21139 all forms trapping and nontrapping comparisons, we can make inequality
21140 comparisons trapping again, since it results in better code when using
21141 FCOM based compares. */
21142 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
21146 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
21148 machine_mode mode = GET_MODE (op0);
21150 if (SCALAR_FLOAT_MODE_P (mode))
21152 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21153 return ix86_fp_compare_mode (code);
21158 /* Only zero flag is needed. */
21159 case EQ: /* ZF=0 */
21160 case NE: /* ZF!=0 */
21162 /* Codes needing carry flag. */
21163 case GEU: /* CF=0 */
21164 case LTU: /* CF=1 */
21165 /* Detect overflow checks. They need just the carry flag. */
21166 if (GET_CODE (op0) == PLUS
21167 && (rtx_equal_p (op1, XEXP (op0, 0))
21168 || rtx_equal_p (op1, XEXP (op0, 1))))
21172 case GTU: /* CF=0 & ZF=0 */
21173 case LEU: /* CF=1 | ZF=1 */
21175 /* Codes possibly doable only with sign flag when
21176 comparing against zero. */
21177 case GE: /* SF=OF or SF=0 */
21178 case LT: /* SF<>OF or SF=1 */
21179 if (op1 == const0_rtx)
21182 /* For other cases Carry flag is not required. */
21184 /* Codes doable only with sign flag when comparing
21185 against zero, but we miss jump instruction for it
21186 so we need to use relational tests against overflow
21187 that thus needs to be zero. */
21188 case GT: /* ZF=0 & SF=OF */
21189 case LE: /* ZF=1 | SF<>OF */
21190 if (op1 == const0_rtx)
21194 /* strcmp pattern do (use flags) and combine may ask us for proper
21199 gcc_unreachable ();
21203 /* Return the fixed registers used for condition codes. */
21206 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
21213 /* If two condition code modes are compatible, return a condition code
21214 mode which is compatible with both. Otherwise, return
21217 static machine_mode
21218 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
21223 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
21226 if ((m1 == CCGCmode && m2 == CCGOCmode)
21227 || (m1 == CCGOCmode && m2 == CCGCmode))
21230 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
21232 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
21238 gcc_unreachable ();
21270 /* These are only compatible with themselves, which we already
21277 /* Return a comparison we can do and that it is equivalent to
21278 swap_condition (code) apart possibly from orderedness.
21279 But, never change orderedness if TARGET_IEEE_FP, returning
21280 UNKNOWN in that case if necessary. */
21282 static enum rtx_code
21283 ix86_fp_swap_condition (enum rtx_code code)
21287 case GT: /* GTU - CF=0 & ZF=0 */
21288 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
21289 case GE: /* GEU - CF=0 */
21290 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
21291 case UNLT: /* LTU - CF=1 */
21292 return TARGET_IEEE_FP ? UNKNOWN : GT;
21293 case UNLE: /* LEU - CF=1 | ZF=1 */
21294 return TARGET_IEEE_FP ? UNKNOWN : GE;
21296 return swap_condition (code);
21300 /* Return cost of comparison CODE using the best strategy for performance.
21301 All following functions do use number of instructions as a cost metrics.
21302 In future this should be tweaked to compute bytes for optimize_size and
21303 take into account performance of various instructions on various CPUs. */
21306 ix86_fp_comparison_cost (enum rtx_code code)
21310 /* The cost of code using bit-twiddling on %ah. */
21327 arith_cost = TARGET_IEEE_FP ? 5 : 4;
21331 arith_cost = TARGET_IEEE_FP ? 6 : 4;
21334 gcc_unreachable ();
21337 switch (ix86_fp_comparison_strategy (code))
21339 case IX86_FPCMP_COMI:
21340 return arith_cost > 4 ? 3 : 2;
21341 case IX86_FPCMP_SAHF:
21342 return arith_cost > 4 ? 4 : 3;
21348 /* Return strategy to use for floating-point. We assume that fcomi is always
21349 preferrable where available, since that is also true when looking at size
21350 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
21352 enum ix86_fpcmp_strategy
21353 ix86_fp_comparison_strategy (enum rtx_code)
21355 /* Do fcomi/sahf based test when profitable. */
21358 return IX86_FPCMP_COMI;
21360 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
21361 return IX86_FPCMP_SAHF;
21363 return IX86_FPCMP_ARITH;
21366 /* Swap, force into registers, or otherwise massage the two operands
21367 to a fp comparison. The operands are updated in place; the new
21368 comparison code is returned. */
21370 static enum rtx_code
21371 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
21373 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
21374 rtx op0 = *pop0, op1 = *pop1;
21375 machine_mode op_mode = GET_MODE (op0);
21376 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
21378 /* All of the unordered compare instructions only work on registers.
21379 The same is true of the fcomi compare instructions. The XFmode
21380 compare instructions require registers except when comparing
21381 against zero or when converting operand 1 from fixed point to
21385 && (fpcmp_mode == CCFPUmode
21386 || (op_mode == XFmode
21387 && ! (standard_80387_constant_p (op0) == 1
21388 || standard_80387_constant_p (op1) == 1)
21389 && GET_CODE (op1) != FLOAT)
21390 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
21392 op0 = force_reg (op_mode, op0);
21393 op1 = force_reg (op_mode, op1);
21397 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
21398 things around if they appear profitable, otherwise force op0
21399 into a register. */
21401 if (standard_80387_constant_p (op0) == 0
21403 && ! (standard_80387_constant_p (op1) == 0
21406 enum rtx_code new_code = ix86_fp_swap_condition (code);
21407 if (new_code != UNKNOWN)
21409 std::swap (op0, op1);
21415 op0 = force_reg (op_mode, op0);
21417 if (CONSTANT_P (op1))
21419 int tmp = standard_80387_constant_p (op1);
21421 op1 = validize_mem (force_const_mem (op_mode, op1));
21425 op1 = force_reg (op_mode, op1);
21428 op1 = force_reg (op_mode, op1);
21432 /* Try to rearrange the comparison to make it cheaper. */
21433 if (ix86_fp_comparison_cost (code)
21434 > ix86_fp_comparison_cost (swap_condition (code))
21435 && (REG_P (op1) || can_create_pseudo_p ()))
21437 std::swap (op0, op1);
21438 code = swap_condition (code);
21440 op0 = force_reg (op_mode, op0);
21448 /* Convert comparison codes we use to represent FP comparison to integer
21449 code that will result in proper branch. Return UNKNOWN if no such code
21453 ix86_fp_compare_code_to_integer (enum rtx_code code)
21482 /* Generate insn patterns to do a floating point compare of OPERANDS. */
21485 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
21487 machine_mode fpcmp_mode, intcmp_mode;
21490 fpcmp_mode = ix86_fp_compare_mode (code);
21491 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
21493 /* Do fcomi/sahf based test when profitable. */
21494 switch (ix86_fp_comparison_strategy (code))
21496 case IX86_FPCMP_COMI:
21497 intcmp_mode = fpcmp_mode;
21498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21499 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21503 case IX86_FPCMP_SAHF:
21504 intcmp_mode = fpcmp_mode;
21505 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21506 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
21509 scratch = gen_reg_rtx (HImode);
21510 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
21511 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
21514 case IX86_FPCMP_ARITH:
21515 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
21516 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
21517 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
21519 scratch = gen_reg_rtx (HImode);
21520 emit_insn (gen_rtx_SET (scratch, tmp2));
21522 /* In the unordered case, we have to check C2 for NaN's, which
21523 doesn't happen to work out to anything nice combination-wise.
21524 So do some bit twiddling on the value we've got in AH to come
21525 up with an appropriate set of condition codes. */
21527 intcmp_mode = CCNOmode;
21532 if (code == GT || !TARGET_IEEE_FP)
21534 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21539 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21540 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21541 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
21542 intcmp_mode = CCmode;
21548 if (code == LT && TARGET_IEEE_FP)
21550 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21551 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
21552 intcmp_mode = CCmode;
21557 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
21563 if (code == GE || !TARGET_IEEE_FP)
21565 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
21570 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21571 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
21577 if (code == LE && TARGET_IEEE_FP)
21579 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21580 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
21581 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21582 intcmp_mode = CCmode;
21587 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
21593 if (code == EQ && TARGET_IEEE_FP)
21595 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21596 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
21597 intcmp_mode = CCmode;
21602 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21608 if (code == NE && TARGET_IEEE_FP)
21610 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
21611 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
21617 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
21623 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21627 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
21632 gcc_unreachable ();
21640 /* Return the test that should be put into the flags user, i.e.
21641 the bcc, scc, or cmov instruction. */
21642 return gen_rtx_fmt_ee (code, VOIDmode,
21643 gen_rtx_REG (intcmp_mode, FLAGS_REG),
21648 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
21652 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
21653 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
21655 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
21657 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
21658 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21661 ret = ix86_expand_int_compare (code, op0, op1);
21667 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
21669 machine_mode mode = GET_MODE (op0);
21681 tmp = ix86_expand_compare (code, op0, op1);
21682 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21683 gen_rtx_LABEL_REF (VOIDmode, label),
21685 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
21692 /* Expand DImode branch into multiple compare+branch. */
21695 rtx_code_label *label2;
21696 enum rtx_code code1, code2, code3;
21697 machine_mode submode;
21699 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
21701 std::swap (op0, op1);
21702 code = swap_condition (code);
21705 split_double_mode (mode, &op0, 1, lo+0, hi+0);
21706 split_double_mode (mode, &op1, 1, lo+1, hi+1);
21708 submode = mode == DImode ? SImode : DImode;
21710 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
21711 avoid two branches. This costs one extra insn, so disable when
21712 optimizing for size. */
21714 if ((code == EQ || code == NE)
21715 && (!optimize_insn_for_size_p ()
21716 || hi[1] == const0_rtx || lo[1] == const0_rtx))
21721 if (hi[1] != const0_rtx)
21722 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
21723 NULL_RTX, 0, OPTAB_WIDEN);
21726 if (lo[1] != const0_rtx)
21727 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
21728 NULL_RTX, 0, OPTAB_WIDEN);
21730 tmp = expand_binop (submode, ior_optab, xor1, xor0,
21731 NULL_RTX, 0, OPTAB_WIDEN);
21733 ix86_expand_branch (code, tmp, const0_rtx, label);
21737 /* Otherwise, if we are doing less-than or greater-or-equal-than,
21738 op1 is a constant and the low word is zero, then we can just
21739 examine the high word. Similarly for low word -1 and
21740 less-or-equal-than or greater-than. */
21742 if (CONST_INT_P (hi[1]))
21745 case LT: case LTU: case GE: case GEU:
21746 if (lo[1] == const0_rtx)
21748 ix86_expand_branch (code, hi[0], hi[1], label);
21752 case LE: case LEU: case GT: case GTU:
21753 if (lo[1] == constm1_rtx)
21755 ix86_expand_branch (code, hi[0], hi[1], label);
21763 /* Otherwise, we need two or three jumps. */
21765 label2 = gen_label_rtx ();
21768 code2 = swap_condition (code);
21769 code3 = unsigned_condition (code);
21773 case LT: case GT: case LTU: case GTU:
21776 case LE: code1 = LT; code2 = GT; break;
21777 case GE: code1 = GT; code2 = LT; break;
21778 case LEU: code1 = LTU; code2 = GTU; break;
21779 case GEU: code1 = GTU; code2 = LTU; break;
21781 case EQ: code1 = UNKNOWN; code2 = NE; break;
21782 case NE: code2 = UNKNOWN; break;
21785 gcc_unreachable ();
21790 * if (hi(a) < hi(b)) goto true;
21791 * if (hi(a) > hi(b)) goto false;
21792 * if (lo(a) < lo(b)) goto true;
21796 if (code1 != UNKNOWN)
21797 ix86_expand_branch (code1, hi[0], hi[1], label);
21798 if (code2 != UNKNOWN)
21799 ix86_expand_branch (code2, hi[0], hi[1], label2);
21801 ix86_expand_branch (code3, lo[0], lo[1], label);
21803 if (code2 != UNKNOWN)
21804 emit_label (label2);
21809 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
21814 /* Split branch based on floating point condition. */
21816 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
21817 rtx target1, rtx target2, rtx tmp)
21822 if (target2 != pc_rtx)
21824 std::swap (target1, target2);
21825 code = reverse_condition_maybe_unordered (code);
21828 condition = ix86_expand_fp_compare (code, op1, op2,
21831 i = emit_jump_insn (gen_rtx_SET
21833 gen_rtx_IF_THEN_ELSE (VOIDmode,
21834 condition, target1, target2)));
21835 if (split_branch_probability >= 0)
21836 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
21840 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21844 gcc_assert (GET_MODE (dest) == QImode);
21846 ret = ix86_expand_compare (code, op0, op1);
21847 PUT_MODE (ret, QImode);
21848 emit_insn (gen_rtx_SET (dest, ret));
21851 /* Expand comparison setting or clearing carry flag. Return true when
21852 successful and set pop for the operation. */
21854 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
21856 machine_mode mode =
21857 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
21859 /* Do not handle double-mode compares that go through special path. */
21860 if (mode == (TARGET_64BIT ? TImode : DImode))
21863 if (SCALAR_FLOAT_MODE_P (mode))
21866 rtx_insn *compare_seq;
21868 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
21870 /* Shortcut: following common codes never translate
21871 into carry flag compares. */
21872 if (code == EQ || code == NE || code == UNEQ || code == LTGT
21873 || code == ORDERED || code == UNORDERED)
21876 /* These comparisons require zero flag; swap operands so they won't. */
21877 if ((code == GT || code == UNLE || code == LE || code == UNGT)
21878 && !TARGET_IEEE_FP)
21880 std::swap (op0, op1);
21881 code = swap_condition (code);
21884 /* Try to expand the comparison and verify that we end up with
21885 carry flag based comparison. This fails to be true only when
21886 we decide to expand comparison using arithmetic that is not
21887 too common scenario. */
21889 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
21890 compare_seq = get_insns ();
21893 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
21894 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
21895 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
21897 code = GET_CODE (compare_op);
21899 if (code != LTU && code != GEU)
21902 emit_insn (compare_seq);
21907 if (!INTEGRAL_MODE_P (mode))
21916 /* Convert a==0 into (unsigned)a<1. */
21919 if (op1 != const0_rtx)
21922 code = (code == EQ ? LTU : GEU);
21925 /* Convert a>b into b<a or a>=b-1. */
21928 if (CONST_INT_P (op1))
21930 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
21931 /* Bail out on overflow. We still can swap operands but that
21932 would force loading of the constant into register. */
21933 if (op1 == const0_rtx
21934 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
21936 code = (code == GTU ? GEU : LTU);
21940 std::swap (op0, op1);
21941 code = (code == GTU ? LTU : GEU);
21945 /* Convert a>=0 into (unsigned)a<0x80000000. */
21948 if (mode == DImode || op1 != const0_rtx)
21950 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21951 code = (code == LT ? GEU : LTU);
21955 if (mode == DImode || op1 != constm1_rtx)
21957 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
21958 code = (code == LE ? GEU : LTU);
21964 /* Swapping operands may cause constant to appear as first operand. */
21965 if (!nonimmediate_operand (op0, VOIDmode))
21967 if (!can_create_pseudo_p ())
21969 op0 = force_reg (mode, op0);
21971 *pop = ix86_expand_compare (code, op0, op1);
21972 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
21977 ix86_expand_int_movcc (rtx operands[])
21979 enum rtx_code code = GET_CODE (operands[1]), compare_code;
21980 rtx_insn *compare_seq;
21982 machine_mode mode = GET_MODE (operands[0]);
21983 bool sign_bit_compare_p = false;
21984 rtx op0 = XEXP (operands[1], 0);
21985 rtx op1 = XEXP (operands[1], 1);
21987 if (GET_MODE (op0) == TImode
21988 || (GET_MODE (op0) == DImode
21993 compare_op = ix86_expand_compare (code, op0, op1);
21994 compare_seq = get_insns ();
21997 compare_code = GET_CODE (compare_op);
21999 if ((op1 == const0_rtx && (code == GE || code == LT))
22000 || (op1 == constm1_rtx && (code == GT || code == LE)))
22001 sign_bit_compare_p = true;
22003 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
22004 HImode insns, we'd be swallowed in word prefix ops. */
22006 if ((mode != HImode || TARGET_FAST_PREFIX)
22007 && (mode != (TARGET_64BIT ? TImode : DImode))
22008 && CONST_INT_P (operands[2])
22009 && CONST_INT_P (operands[3]))
22011 rtx out = operands[0];
22012 HOST_WIDE_INT ct = INTVAL (operands[2]);
22013 HOST_WIDE_INT cf = INTVAL (operands[3]);
22014 HOST_WIDE_INT diff;
22017 /* Sign bit compares are better done using shifts than we do by using
22019 if (sign_bit_compare_p
22020 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22022 /* Detect overlap between destination and compare sources. */
22025 if (!sign_bit_compare_p)
22028 bool fpcmp = false;
22030 compare_code = GET_CODE (compare_op);
22032 flags = XEXP (compare_op, 0);
22034 if (GET_MODE (flags) == CCFPmode
22035 || GET_MODE (flags) == CCFPUmode)
22039 = ix86_fp_compare_code_to_integer (compare_code);
22042 /* To simplify rest of code, restrict to the GEU case. */
22043 if (compare_code == LTU)
22045 std::swap (ct, cf);
22046 compare_code = reverse_condition (compare_code);
22047 code = reverse_condition (code);
22052 PUT_CODE (compare_op,
22053 reverse_condition_maybe_unordered
22054 (GET_CODE (compare_op)));
22056 PUT_CODE (compare_op,
22057 reverse_condition (GET_CODE (compare_op)));
22061 if (reg_overlap_mentioned_p (out, op0)
22062 || reg_overlap_mentioned_p (out, op1))
22063 tmp = gen_reg_rtx (mode);
22065 if (mode == DImode)
22066 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
22068 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
22069 flags, compare_op));
22073 if (code == GT || code == GE)
22074 code = reverse_condition (code);
22077 std::swap (ct, cf);
22080 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
22093 tmp = expand_simple_binop (mode, PLUS,
22095 copy_rtx (tmp), 1, OPTAB_DIRECT);
22106 tmp = expand_simple_binop (mode, IOR,
22108 copy_rtx (tmp), 1, OPTAB_DIRECT);
22110 else if (diff == -1 && ct)
22120 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22122 tmp = expand_simple_binop (mode, PLUS,
22123 copy_rtx (tmp), GEN_INT (cf),
22124 copy_rtx (tmp), 1, OPTAB_DIRECT);
22132 * andl cf - ct, dest
22142 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
22145 tmp = expand_simple_binop (mode, AND,
22147 gen_int_mode (cf - ct, mode),
22148 copy_rtx (tmp), 1, OPTAB_DIRECT);
22150 tmp = expand_simple_binop (mode, PLUS,
22151 copy_rtx (tmp), GEN_INT (ct),
22152 copy_rtx (tmp), 1, OPTAB_DIRECT);
22155 if (!rtx_equal_p (tmp, out))
22156 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
22163 machine_mode cmp_mode = GET_MODE (op0);
22164 enum rtx_code new_code;
22166 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22168 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22170 /* We may be reversing unordered compare to normal compare, that
22171 is not valid in general (we may convert non-trapping condition
22172 to trapping one), however on i386 we currently emit all
22173 comparisons unordered. */
22174 new_code = reverse_condition_maybe_unordered (code);
22177 new_code = ix86_reverse_condition (code, cmp_mode);
22178 if (new_code != UNKNOWN)
22180 std::swap (ct, cf);
22186 compare_code = UNKNOWN;
22187 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
22188 && CONST_INT_P (op1))
22190 if (op1 == const0_rtx
22191 && (code == LT || code == GE))
22192 compare_code = code;
22193 else if (op1 == constm1_rtx)
22197 else if (code == GT)
22202 /* Optimize dest = (op0 < 0) ? -1 : cf. */
22203 if (compare_code != UNKNOWN
22204 && GET_MODE (op0) == GET_MODE (out)
22205 && (cf == -1 || ct == -1))
22207 /* If lea code below could be used, only optimize
22208 if it results in a 2 insn sequence. */
22210 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
22211 || diff == 3 || diff == 5 || diff == 9)
22212 || (compare_code == LT && ct == -1)
22213 || (compare_code == GE && cf == -1))
22216 * notl op1 (if necessary)
22224 code = reverse_condition (code);
22227 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22229 out = expand_simple_binop (mode, IOR,
22231 out, 1, OPTAB_DIRECT);
22232 if (out != operands[0])
22233 emit_move_insn (operands[0], out);
22240 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
22241 || diff == 3 || diff == 5 || diff == 9)
22242 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
22244 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
22250 * lea cf(dest*(ct-cf)),dest
22254 * This also catches the degenerate setcc-only case.
22260 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22263 /* On x86_64 the lea instruction operates on Pmode, so we need
22264 to get arithmetics done in proper mode to match. */
22266 tmp = copy_rtx (out);
22270 out1 = copy_rtx (out);
22271 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
22275 tmp = gen_rtx_PLUS (mode, tmp, out1);
22281 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
22284 if (!rtx_equal_p (tmp, out))
22287 out = force_operand (tmp, copy_rtx (out));
22289 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
22291 if (!rtx_equal_p (out, operands[0]))
22292 emit_move_insn (operands[0], copy_rtx (out));
22298 * General case: Jumpful:
22299 * xorl dest,dest cmpl op1, op2
22300 * cmpl op1, op2 movl ct, dest
22301 * setcc dest jcc 1f
22302 * decl dest movl cf, dest
22303 * andl (cf-ct),dest 1:
22306 * Size 20. Size 14.
22308 * This is reasonably steep, but branch mispredict costs are
22309 * high on modern cpus, so consider failing only if optimizing
22313 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22314 && BRANCH_COST (optimize_insn_for_speed_p (),
22319 machine_mode cmp_mode = GET_MODE (op0);
22320 enum rtx_code new_code;
22322 if (SCALAR_FLOAT_MODE_P (cmp_mode))
22324 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
22326 /* We may be reversing unordered compare to normal compare,
22327 that is not valid in general (we may convert non-trapping
22328 condition to trapping one), however on i386 we currently
22329 emit all comparisons unordered. */
22330 new_code = reverse_condition_maybe_unordered (code);
22334 new_code = ix86_reverse_condition (code, cmp_mode);
22335 if (compare_code != UNKNOWN && new_code != UNKNOWN)
22336 compare_code = reverse_condition (compare_code);
22339 if (new_code != UNKNOWN)
22347 if (compare_code != UNKNOWN)
22349 /* notl op1 (if needed)
22354 For x < 0 (resp. x <= -1) there will be no notl,
22355 so if possible swap the constants to get rid of the
22357 True/false will be -1/0 while code below (store flag
22358 followed by decrement) is 0/-1, so the constants need
22359 to be exchanged once more. */
22361 if (compare_code == GE || !cf)
22363 code = reverse_condition (code);
22367 std::swap (ct, cf);
22369 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
22373 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
22375 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
22377 copy_rtx (out), 1, OPTAB_DIRECT);
22380 out = expand_simple_binop (mode, AND, copy_rtx (out),
22381 gen_int_mode (cf - ct, mode),
22382 copy_rtx (out), 1, OPTAB_DIRECT);
22384 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
22385 copy_rtx (out), 1, OPTAB_DIRECT);
22386 if (!rtx_equal_p (out, operands[0]))
22387 emit_move_insn (operands[0], copy_rtx (out));
22393 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
22395 /* Try a few things more with specific constants and a variable. */
22398 rtx var, orig_out, out, tmp;
22400 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
22403 /* If one of the two operands is an interesting constant, load a
22404 constant with the above and mask it in with a logical operation. */
22406 if (CONST_INT_P (operands[2]))
22409 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
22410 operands[3] = constm1_rtx, op = and_optab;
22411 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
22412 operands[3] = const0_rtx, op = ior_optab;
22416 else if (CONST_INT_P (operands[3]))
22419 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
22420 operands[2] = constm1_rtx, op = and_optab;
22421 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
22422 operands[2] = const0_rtx, op = ior_optab;
22429 orig_out = operands[0];
22430 tmp = gen_reg_rtx (mode);
22433 /* Recurse to get the constant loaded. */
22434 if (!ix86_expand_int_movcc (operands))
22437 /* Mask in the interesting variable. */
22438 out = expand_binop (mode, op, var, tmp, orig_out, 0,
22440 if (!rtx_equal_p (out, orig_out))
22441 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
22447 * For comparison with above,
22457 if (! nonimmediate_operand (operands[2], mode))
22458 operands[2] = force_reg (mode, operands[2]);
22459 if (! nonimmediate_operand (operands[3], mode))
22460 operands[3] = force_reg (mode, operands[3]);
22462 if (! register_operand (operands[2], VOIDmode)
22464 || ! register_operand (operands[3], VOIDmode)))
22465 operands[2] = force_reg (mode, operands[2]);
22468 && ! register_operand (operands[3], VOIDmode))
22469 operands[3] = force_reg (mode, operands[3]);
22471 emit_insn (compare_seq);
22472 emit_insn (gen_rtx_SET (operands[0],
22473 gen_rtx_IF_THEN_ELSE (mode,
22474 compare_op, operands[2],
22479 /* Swap, force into registers, or otherwise massage the two operands
22480 to an sse comparison with a mask result. Thus we differ a bit from
22481 ix86_prepare_fp_compare_args which expects to produce a flags result.
22483 The DEST operand exists to help determine whether to commute commutative
22484 operators. The POP0/POP1 operands are updated in place. The new
22485 comparison code is returned, or UNKNOWN if not implementable. */
22487 static enum rtx_code
22488 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
22489 rtx *pop0, rtx *pop1)
22495 /* AVX supports all the needed comparisons. */
22498 /* We have no LTGT as an operator. We could implement it with
22499 NE & ORDERED, but this requires an extra temporary. It's
22500 not clear that it's worth it. */
22507 /* These are supported directly. */
22514 /* AVX has 3 operand comparisons, no need to swap anything. */
22517 /* For commutative operators, try to canonicalize the destination
22518 operand to be first in the comparison - this helps reload to
22519 avoid extra moves. */
22520 if (!dest || !rtx_equal_p (dest, *pop1))
22528 /* These are not supported directly before AVX, and furthermore
22529 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
22530 comparison operands to transform into something that is
22532 std::swap (*pop0, *pop1);
22533 code = swap_condition (code);
22537 gcc_unreachable ();
22543 /* Detect conditional moves that exactly match min/max operational
22544 semantics. Note that this is IEEE safe, as long as we don't
22545 interchange the operands.
22547 Returns FALSE if this conditional move doesn't match a MIN/MAX,
22548 and TRUE if the operation is successful and instructions are emitted. */
22551 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
22552 rtx cmp_op1, rtx if_true, rtx if_false)
22560 else if (code == UNGE)
22561 std::swap (if_true, if_false);
22565 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
22567 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
22572 mode = GET_MODE (dest);
22574 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
22575 but MODE may be a vector mode and thus not appropriate. */
22576 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
22578 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
22581 if_true = force_reg (mode, if_true);
22582 v = gen_rtvec (2, if_true, if_false);
22583 tmp = gen_rtx_UNSPEC (mode, v, u);
22587 code = is_min ? SMIN : SMAX;
22588 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
22591 emit_insn (gen_rtx_SET (dest, tmp));
22595 /* Expand an sse vector comparison. Return the register with the result. */
22598 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
22599 rtx op_true, rtx op_false)
22601 machine_mode mode = GET_MODE (dest);
22602 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
22604 /* In general case result of comparison can differ from operands' type. */
22605 machine_mode cmp_mode;
22607 /* In AVX512F the result of comparison is an integer mask. */
22608 bool maskcmp = false;
22611 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
22613 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
22614 gcc_assert (cmp_mode != BLKmode);
22619 cmp_mode = cmp_ops_mode;
22622 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
22623 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
22624 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
22627 || (op_true && reg_overlap_mentioned_p (dest, op_true))
22628 || (op_false && reg_overlap_mentioned_p (dest, op_false)))
22629 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
22631 /* Compare patterns for int modes are unspec in AVX512F only. */
22632 if (maskcmp && (code == GT || code == EQ))
22634 rtx (*gen)(rtx, rtx, rtx);
22636 switch (cmp_ops_mode)
22639 gcc_assert (TARGET_AVX512BW);
22640 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
22643 gcc_assert (TARGET_AVX512BW);
22644 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
22647 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
22650 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
22658 emit_insn (gen (dest, cmp_op0, cmp_op1));
22662 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
22664 if (cmp_mode != mode && !maskcmp)
22666 x = force_reg (cmp_ops_mode, x);
22667 convert_move (dest, x, false);
22670 emit_insn (gen_rtx_SET (dest, x));
22675 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
22676 operations. This is used for both scalar and vector conditional moves. */
22679 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
22681 machine_mode mode = GET_MODE (dest);
22682 machine_mode cmpmode = GET_MODE (cmp);
22684 /* In AVX512F the result of comparison is an integer mask. */
22685 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
22689 /* If we have an integer mask and FP value then we need
22690 to cast mask to FP mode. */
22691 if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
22693 cmp = force_reg (cmpmode, cmp);
22694 cmp = gen_rtx_SUBREG (mode, cmp, 0);
22697 if (vector_all_ones_operand (op_true, mode)
22698 && rtx_equal_p (op_false, CONST0_RTX (mode))
22701 emit_insn (gen_rtx_SET (dest, cmp));
22703 else if (op_false == CONST0_RTX (mode)
22706 op_true = force_reg (mode, op_true);
22707 x = gen_rtx_AND (mode, cmp, op_true);
22708 emit_insn (gen_rtx_SET (dest, x));
22710 else if (op_true == CONST0_RTX (mode)
22713 op_false = force_reg (mode, op_false);
22714 x = gen_rtx_NOT (mode, cmp);
22715 x = gen_rtx_AND (mode, x, op_false);
22716 emit_insn (gen_rtx_SET (dest, x));
22718 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
22721 op_false = force_reg (mode, op_false);
22722 x = gen_rtx_IOR (mode, cmp, op_false);
22723 emit_insn (gen_rtx_SET (dest, x));
22725 else if (TARGET_XOP
22728 op_true = force_reg (mode, op_true);
22730 if (!nonimmediate_operand (op_false, mode))
22731 op_false = force_reg (mode, op_false);
22733 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
22739 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22742 if (!nonimmediate_operand (op_true, mode))
22743 op_true = force_reg (mode, op_true);
22745 op_false = force_reg (mode, op_false);
22751 gen = gen_sse4_1_blendvps;
22755 gen = gen_sse4_1_blendvpd;
22763 gen = gen_sse4_1_pblendvb;
22764 if (mode != V16QImode)
22765 d = gen_reg_rtx (V16QImode);
22766 op_false = gen_lowpart (V16QImode, op_false);
22767 op_true = gen_lowpart (V16QImode, op_true);
22768 cmp = gen_lowpart (V16QImode, cmp);
22773 gen = gen_avx_blendvps256;
22777 gen = gen_avx_blendvpd256;
22785 gen = gen_avx2_pblendvb;
22786 if (mode != V32QImode)
22787 d = gen_reg_rtx (V32QImode);
22788 op_false = gen_lowpart (V32QImode, op_false);
22789 op_true = gen_lowpart (V32QImode, op_true);
22790 cmp = gen_lowpart (V32QImode, cmp);
22795 gen = gen_avx512bw_blendmv64qi;
22798 gen = gen_avx512bw_blendmv32hi;
22801 gen = gen_avx512f_blendmv16si;
22804 gen = gen_avx512f_blendmv8di;
22807 gen = gen_avx512f_blendmv8df;
22810 gen = gen_avx512f_blendmv16sf;
22819 emit_insn (gen (d, op_false, op_true, cmp));
22821 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
22825 op_true = force_reg (mode, op_true);
22827 t2 = gen_reg_rtx (mode);
22829 t3 = gen_reg_rtx (mode);
22833 x = gen_rtx_AND (mode, op_true, cmp);
22834 emit_insn (gen_rtx_SET (t2, x));
22836 x = gen_rtx_NOT (mode, cmp);
22837 x = gen_rtx_AND (mode, x, op_false);
22838 emit_insn (gen_rtx_SET (t3, x));
22840 x = gen_rtx_IOR (mode, t3, t2);
22841 emit_insn (gen_rtx_SET (dest, x));
22846 /* Expand a floating-point conditional move. Return true if successful. */
22849 ix86_expand_fp_movcc (rtx operands[])
22851 machine_mode mode = GET_MODE (operands[0]);
22852 enum rtx_code code = GET_CODE (operands[1]);
22853 rtx tmp, compare_op;
22854 rtx op0 = XEXP (operands[1], 0);
22855 rtx op1 = XEXP (operands[1], 1);
22857 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22859 machine_mode cmode;
22861 /* Since we've no cmove for sse registers, don't force bad register
22862 allocation just to gain access to it. Deny movcc when the
22863 comparison mode doesn't match the move mode. */
22864 cmode = GET_MODE (op0);
22865 if (cmode == VOIDmode)
22866 cmode = GET_MODE (op1);
22870 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
22871 if (code == UNKNOWN)
22874 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
22875 operands[2], operands[3]))
22878 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
22879 operands[2], operands[3]);
22880 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
22884 if (GET_MODE (op0) == TImode
22885 || (GET_MODE (op0) == DImode
22889 /* The floating point conditional move instructions don't directly
22890 support conditions resulting from a signed integer comparison. */
22892 compare_op = ix86_expand_compare (code, op0, op1);
22893 if (!fcmov_comparison_operator (compare_op, VOIDmode))
22895 tmp = gen_reg_rtx (QImode);
22896 ix86_expand_setcc (tmp, code, op0, op1);
22898 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
22901 emit_insn (gen_rtx_SET (operands[0],
22902 gen_rtx_IF_THEN_ELSE (mode, compare_op,
22903 operands[2], operands[3])));
22908 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
22911 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
22932 gcc_unreachable ();
22936 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
22939 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
22956 gcc_unreachable ();
22960 /* Return immediate value to be used in UNSPEC_PCMP
22961 for comparison CODE in MODE. */
22964 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
22966 if (FLOAT_MODE_P (mode))
22967 return ix86_fp_cmp_code_to_pcmp_immediate (code);
22968 return ix86_int_cmp_code_to_pcmp_immediate (code);
22971 /* Expand AVX-512 vector comparison. */
22974 ix86_expand_mask_vec_cmp (rtx operands[])
22976 machine_mode mask_mode = GET_MODE (operands[0]);
22977 machine_mode cmp_mode = GET_MODE (operands[2]);
22978 enum rtx_code code = GET_CODE (operands[1]);
22979 rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
22989 unspec_code = UNSPEC_UNSIGNED_PCMP;
22993 unspec_code = UNSPEC_PCMP;
22996 unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
22999 emit_insn (gen_rtx_SET (operands[0], unspec));
23004 /* Expand fp vector comparison. */
23007 ix86_expand_fp_vec_cmp (rtx operands[])
23009 enum rtx_code code = GET_CODE (operands[1]);
23012 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23013 &operands[2], &operands[3]);
23014 if (code == UNKNOWN)
23017 switch (GET_CODE (operands[1]))
23020 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
23021 operands[3], NULL, NULL);
23022 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
23023 operands[3], NULL, NULL);
23027 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
23028 operands[3], NULL, NULL);
23029 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
23030 operands[3], NULL, NULL);
23034 gcc_unreachable ();
23036 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23040 cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
23041 operands[1], operands[2]);
23043 if (operands[0] != cmp)
23044 emit_move_insn (operands[0], cmp);
23050 ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
23051 rtx op_true, rtx op_false, bool *negate)
23053 machine_mode data_mode = GET_MODE (dest);
23054 machine_mode mode = GET_MODE (cop0);
23059 /* XOP supports all of the comparisons on all 128-bit vector int types. */
23061 && (mode == V16QImode || mode == V8HImode
23062 || mode == V4SImode || mode == V2DImode))
23066 /* Canonicalize the comparison to EQ, GT, GTU. */
23077 code = reverse_condition (code);
23083 code = reverse_condition (code);
23089 std::swap (cop0, cop1);
23090 code = swap_condition (code);
23094 gcc_unreachable ();
23097 /* Only SSE4.1/SSE4.2 supports V2DImode. */
23098 if (mode == V2DImode)
23103 /* SSE4.1 supports EQ. */
23104 if (!TARGET_SSE4_1)
23110 /* SSE4.2 supports GT/GTU. */
23111 if (!TARGET_SSE4_2)
23116 gcc_unreachable ();
23120 /* Unsigned parallel compare is not supported by the hardware.
23121 Play some tricks to turn this into a signed comparison
23125 cop0 = force_reg (mode, cop0);
23137 rtx (*gen_sub3) (rtx, rtx, rtx);
23141 case V16SImode: gen_sub3 = gen_subv16si3; break;
23142 case V8DImode: gen_sub3 = gen_subv8di3; break;
23143 case V8SImode: gen_sub3 = gen_subv8si3; break;
23144 case V4DImode: gen_sub3 = gen_subv4di3; break;
23145 case V4SImode: gen_sub3 = gen_subv4si3; break;
23146 case V2DImode: gen_sub3 = gen_subv2di3; break;
23148 gcc_unreachable ();
23150 /* Subtract (-(INT MAX) - 1) from both operands to make
23152 mask = ix86_build_signbit_mask (mode, true, false);
23153 t1 = gen_reg_rtx (mode);
23154 emit_insn (gen_sub3 (t1, cop0, mask));
23156 t2 = gen_reg_rtx (mode);
23157 emit_insn (gen_sub3 (t2, cop1, mask));
23171 /* Perform a parallel unsigned saturating subtraction. */
23172 x = gen_reg_rtx (mode);
23173 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
23177 cop1 = CONST0_RTX (mode);
23179 *negate = !*negate;
23183 gcc_unreachable ();
23189 std::swap (op_true, op_false);
23191 /* Allow the comparison to be done in one mode, but the movcc to
23192 happen in another mode. */
23193 if (data_mode == mode)
23195 x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
23196 op_true, op_false);
23200 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
23201 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
23202 op_true, op_false);
23203 if (GET_MODE (x) == mode)
23204 x = gen_lowpart (data_mode, x);
23210 /* Expand integer vector comparison. */
23213 ix86_expand_int_vec_cmp (rtx operands[])
23215 rtx_code code = GET_CODE (operands[1]);
23216 bool negate = false;
23217 rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
23218 operands[3], NULL, NULL, &negate);
23224 cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
23225 CONST0_RTX (GET_MODE (cmp)),
23226 NULL, NULL, &negate);
23228 gcc_assert (!negate);
23230 if (operands[0] != cmp)
23231 emit_move_insn (operands[0], cmp);
23236 /* Expand a floating-point vector conditional move; a vcond operation
23237 rather than a movcc operation. */
23240 ix86_expand_fp_vcond (rtx operands[])
23242 enum rtx_code code = GET_CODE (operands[3]);
23245 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
23246 &operands[4], &operands[5]);
23247 if (code == UNKNOWN)
23250 switch (GET_CODE (operands[3]))
23253 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
23254 operands[5], operands[0], operands[0]);
23255 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
23256 operands[5], operands[1], operands[2]);
23260 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
23261 operands[5], operands[0], operands[0]);
23262 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
23263 operands[5], operands[1], operands[2]);
23267 gcc_unreachable ();
23269 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
23271 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23275 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
23276 operands[5], operands[1], operands[2]))
23279 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
23280 operands[1], operands[2]);
23281 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
23285 /* Expand a signed/unsigned integral vector conditional move. */
23288 ix86_expand_int_vcond (rtx operands[])
23290 machine_mode data_mode = GET_MODE (operands[0]);
23291 machine_mode mode = GET_MODE (operands[4]);
23292 enum rtx_code code = GET_CODE (operands[3]);
23293 bool negate = false;
23296 cop0 = operands[4];
23297 cop1 = operands[5];
23299 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
23300 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
23301 if ((code == LT || code == GE)
23302 && data_mode == mode
23303 && cop1 == CONST0_RTX (mode)
23304 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
23305 && GET_MODE_UNIT_SIZE (data_mode) > 1
23306 && GET_MODE_UNIT_SIZE (data_mode) <= 8
23307 && (GET_MODE_SIZE (data_mode) == 16
23308 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
23310 rtx negop = operands[2 - (code == LT)];
23311 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
23312 if (negop == CONST1_RTX (data_mode))
23314 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
23315 operands[0], 1, OPTAB_DIRECT);
23316 if (res != operands[0])
23317 emit_move_insn (operands[0], res);
23320 else if (GET_MODE_INNER (data_mode) != DImode
23321 && vector_all_ones_operand (negop, data_mode))
23323 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
23324 operands[0], 0, OPTAB_DIRECT);
23325 if (res != operands[0])
23326 emit_move_insn (operands[0], res);
23331 if (!nonimmediate_operand (cop1, mode))
23332 cop1 = force_reg (mode, cop1);
23333 if (!general_operand (operands[1], data_mode))
23334 operands[1] = force_reg (data_mode, operands[1]);
23335 if (!general_operand (operands[2], data_mode))
23336 operands[2] = force_reg (data_mode, operands[2]);
23338 x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
23339 operands[1], operands[2], &negate);
23344 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
23345 operands[2-negate]);
23349 /* AVX512F does support 64-byte integer vector operations,
23350 thus the longest vector we are faced with is V64QImode. */
23351 #define MAX_VECT_LEN 64
23353 struct expand_vec_perm_d
23355 rtx target, op0, op1;
23356 unsigned char perm[MAX_VECT_LEN];
23357 machine_mode vmode;
23358 unsigned char nelt;
23359 bool one_operand_p;
23364 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
23365 struct expand_vec_perm_d *d)
23367 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23368 expander, so args are either in d, or in op0, op1 etc. */
23369 machine_mode mode = GET_MODE (d ? d->op0 : op0);
23370 machine_mode maskmode = mode;
23371 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
23376 if (TARGET_AVX512VL && TARGET_AVX512BW)
23377 gen = gen_avx512vl_vpermi2varv8hi3;
23380 if (TARGET_AVX512VL && TARGET_AVX512BW)
23381 gen = gen_avx512vl_vpermi2varv16hi3;
23384 if (TARGET_AVX512VBMI)
23385 gen = gen_avx512bw_vpermi2varv64qi3;
23388 if (TARGET_AVX512BW)
23389 gen = gen_avx512bw_vpermi2varv32hi3;
23392 if (TARGET_AVX512VL)
23393 gen = gen_avx512vl_vpermi2varv4si3;
23396 if (TARGET_AVX512VL)
23397 gen = gen_avx512vl_vpermi2varv8si3;
23400 if (TARGET_AVX512F)
23401 gen = gen_avx512f_vpermi2varv16si3;
23404 if (TARGET_AVX512VL)
23406 gen = gen_avx512vl_vpermi2varv4sf3;
23407 maskmode = V4SImode;
23411 if (TARGET_AVX512VL)
23413 gen = gen_avx512vl_vpermi2varv8sf3;
23414 maskmode = V8SImode;
23418 if (TARGET_AVX512F)
23420 gen = gen_avx512f_vpermi2varv16sf3;
23421 maskmode = V16SImode;
23425 if (TARGET_AVX512VL)
23426 gen = gen_avx512vl_vpermi2varv2di3;
23429 if (TARGET_AVX512VL)
23430 gen = gen_avx512vl_vpermi2varv4di3;
23433 if (TARGET_AVX512F)
23434 gen = gen_avx512f_vpermi2varv8di3;
23437 if (TARGET_AVX512VL)
23439 gen = gen_avx512vl_vpermi2varv2df3;
23440 maskmode = V2DImode;
23444 if (TARGET_AVX512VL)
23446 gen = gen_avx512vl_vpermi2varv4df3;
23447 maskmode = V4DImode;
23451 if (TARGET_AVX512F)
23453 gen = gen_avx512f_vpermi2varv8df3;
23454 maskmode = V8DImode;
23464 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
23465 expander, so args are either in d, or in op0, op1 etc. */
23469 target = d->target;
23472 for (int i = 0; i < d->nelt; ++i)
23473 vec[i] = GEN_INT (d->perm[i]);
23474 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
23477 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
23481 /* Expand a variable vector permutation. */
23484 ix86_expand_vec_perm (rtx operands[])
23486 rtx target = operands[0];
23487 rtx op0 = operands[1];
23488 rtx op1 = operands[2];
23489 rtx mask = operands[3];
23490 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
23491 machine_mode mode = GET_MODE (op0);
23492 machine_mode maskmode = GET_MODE (mask);
23494 bool one_operand_shuffle = rtx_equal_p (op0, op1);
23496 /* Number of elements in the vector. */
23497 w = GET_MODE_NUNITS (mode);
23498 e = GET_MODE_UNIT_SIZE (mode);
23499 gcc_assert (w <= 64);
23501 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
23506 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
23508 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
23509 an constant shuffle operand. With a tiny bit of effort we can
23510 use VPERMD instead. A re-interpretation stall for V4DFmode is
23511 unfortunate but there's no avoiding it.
23512 Similarly for V16HImode we don't have instructions for variable
23513 shuffling, while for V32QImode we can use after preparing suitable
23514 masks vpshufb; vpshufb; vpermq; vpor. */
23516 if (mode == V16HImode)
23518 maskmode = mode = V32QImode;
23524 maskmode = mode = V8SImode;
23528 t1 = gen_reg_rtx (maskmode);
23530 /* Replicate the low bits of the V4DImode mask into V8SImode:
23532 t1 = { A A B B C C D D }. */
23533 for (i = 0; i < w / 2; ++i)
23534 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
23535 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23536 vt = force_reg (maskmode, vt);
23537 mask = gen_lowpart (maskmode, mask);
23538 if (maskmode == V8SImode)
23539 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
23541 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
23543 /* Multiply the shuffle indicies by two. */
23544 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
23547 /* Add one to the odd shuffle indicies:
23548 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
23549 for (i = 0; i < w / 2; ++i)
23551 vec[i * 2] = const0_rtx;
23552 vec[i * 2 + 1] = const1_rtx;
23554 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23555 vt = validize_mem (force_const_mem (maskmode, vt));
23556 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
23559 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
23560 operands[3] = mask = t1;
23561 target = gen_reg_rtx (mode);
23562 op0 = gen_lowpart (mode, op0);
23563 op1 = gen_lowpart (mode, op1);
23569 /* The VPERMD and VPERMPS instructions already properly ignore
23570 the high bits of the shuffle elements. No need for us to
23571 perform an AND ourselves. */
23572 if (one_operand_shuffle)
23574 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
23575 if (target != operands[0])
23576 emit_move_insn (operands[0],
23577 gen_lowpart (GET_MODE (operands[0]), target));
23581 t1 = gen_reg_rtx (V8SImode);
23582 t2 = gen_reg_rtx (V8SImode);
23583 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
23584 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
23590 mask = gen_lowpart (V8SImode, mask);
23591 if (one_operand_shuffle)
23592 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
23595 t1 = gen_reg_rtx (V8SFmode);
23596 t2 = gen_reg_rtx (V8SFmode);
23597 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
23598 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
23604 /* By combining the two 128-bit input vectors into one 256-bit
23605 input vector, we can use VPERMD and VPERMPS for the full
23606 two-operand shuffle. */
23607 t1 = gen_reg_rtx (V8SImode);
23608 t2 = gen_reg_rtx (V8SImode);
23609 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
23610 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23611 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
23612 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
23616 t1 = gen_reg_rtx (V8SFmode);
23617 t2 = gen_reg_rtx (V8SImode);
23618 mask = gen_lowpart (V4SImode, mask);
23619 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
23620 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
23621 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
23622 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
23626 t1 = gen_reg_rtx (V32QImode);
23627 t2 = gen_reg_rtx (V32QImode);
23628 t3 = gen_reg_rtx (V32QImode);
23629 vt2 = GEN_INT (-128);
23630 for (i = 0; i < 32; i++)
23632 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23633 vt = force_reg (V32QImode, vt);
23634 for (i = 0; i < 32; i++)
23635 vec[i] = i < 16 ? vt2 : const0_rtx;
23636 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
23637 vt2 = force_reg (V32QImode, vt2);
23638 /* From mask create two adjusted masks, which contain the same
23639 bits as mask in the low 7 bits of each vector element.
23640 The first mask will have the most significant bit clear
23641 if it requests element from the same 128-bit lane
23642 and MSB set if it requests element from the other 128-bit lane.
23643 The second mask will have the opposite values of the MSB,
23644 and additionally will have its 128-bit lanes swapped.
23645 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
23646 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
23647 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
23648 stands for other 12 bytes. */
23649 /* The bit whether element is from the same lane or the other
23650 lane is bit 4, so shift it up by 3 to the MSB position. */
23651 t5 = gen_reg_rtx (V4DImode);
23652 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
23654 /* Clear MSB bits from the mask just in case it had them set. */
23655 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
23656 /* After this t1 will have MSB set for elements from other lane. */
23657 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
23658 /* Clear bits other than MSB. */
23659 emit_insn (gen_andv32qi3 (t1, t1, vt));
23660 /* Or in the lower bits from mask into t3. */
23661 emit_insn (gen_iorv32qi3 (t3, t1, t2));
23662 /* And invert MSB bits in t1, so MSB is set for elements from the same
23664 emit_insn (gen_xorv32qi3 (t1, t1, vt));
23665 /* Swap 128-bit lanes in t3. */
23666 t6 = gen_reg_rtx (V4DImode);
23667 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
23668 const2_rtx, GEN_INT (3),
23669 const0_rtx, const1_rtx));
23670 /* And or in the lower bits from mask into t1. */
23671 emit_insn (gen_iorv32qi3 (t1, t1, t2));
23672 if (one_operand_shuffle)
23674 /* Each of these shuffles will put 0s in places where
23675 element from the other 128-bit lane is needed, otherwise
23676 will shuffle in the requested value. */
23677 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
23678 gen_lowpart (V32QImode, t6)));
23679 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
23680 /* For t3 the 128-bit lanes are swapped again. */
23681 t7 = gen_reg_rtx (V4DImode);
23682 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
23683 const2_rtx, GEN_INT (3),
23684 const0_rtx, const1_rtx));
23685 /* And oring both together leads to the result. */
23686 emit_insn (gen_iorv32qi3 (target, t1,
23687 gen_lowpart (V32QImode, t7)));
23688 if (target != operands[0])
23689 emit_move_insn (operands[0],
23690 gen_lowpart (GET_MODE (operands[0]), target));
23694 t4 = gen_reg_rtx (V32QImode);
23695 /* Similarly to the above one_operand_shuffle code,
23696 just for repeated twice for each operand. merge_two:
23697 code will merge the two results together. */
23698 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
23699 gen_lowpart (V32QImode, t6)));
23700 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
23701 gen_lowpart (V32QImode, t6)));
23702 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
23703 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
23704 t7 = gen_reg_rtx (V4DImode);
23705 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
23706 const2_rtx, GEN_INT (3),
23707 const0_rtx, const1_rtx));
23708 t8 = gen_reg_rtx (V4DImode);
23709 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
23710 const2_rtx, GEN_INT (3),
23711 const0_rtx, const1_rtx));
23712 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
23713 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
23719 gcc_assert (GET_MODE_SIZE (mode) <= 16);
23726 /* The XOP VPPERM insn supports three inputs. By ignoring the
23727 one_operand_shuffle special case, we avoid creating another
23728 set of constant vectors in memory. */
23729 one_operand_shuffle = false;
23731 /* mask = mask & {2*w-1, ...} */
23732 vt = GEN_INT (2*w - 1);
23736 /* mask = mask & {w-1, ...} */
23737 vt = GEN_INT (w - 1);
23740 for (i = 0; i < w; i++)
23742 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23743 mask = expand_simple_binop (maskmode, AND, mask, vt,
23744 NULL_RTX, 0, OPTAB_DIRECT);
23746 /* For non-QImode operations, convert the word permutation control
23747 into a byte permutation control. */
23748 if (mode != V16QImode)
23750 mask = expand_simple_binop (maskmode, ASHIFT, mask,
23751 GEN_INT (exact_log2 (e)),
23752 NULL_RTX, 0, OPTAB_DIRECT);
23754 /* Convert mask to vector of chars. */
23755 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
23757 /* Replicate each of the input bytes into byte positions:
23758 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
23759 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
23760 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
23761 for (i = 0; i < 16; ++i)
23762 vec[i] = GEN_INT (i/e * e);
23763 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23764 vt = validize_mem (force_const_mem (V16QImode, vt));
23766 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
23768 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
23770 /* Convert it into the byte positions by doing
23771 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
23772 for (i = 0; i < 16; ++i)
23773 vec[i] = GEN_INT (i % e);
23774 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
23775 vt = validize_mem (force_const_mem (V16QImode, vt));
23776 emit_insn (gen_addv16qi3 (mask, mask, vt));
23779 /* The actual shuffle operations all operate on V16QImode. */
23780 op0 = gen_lowpart (V16QImode, op0);
23781 op1 = gen_lowpart (V16QImode, op1);
23785 if (GET_MODE (target) != V16QImode)
23786 target = gen_reg_rtx (V16QImode);
23787 emit_insn (gen_xop_pperm (target, op0, op1, mask));
23788 if (target != operands[0])
23789 emit_move_insn (operands[0],
23790 gen_lowpart (GET_MODE (operands[0]), target));
23792 else if (one_operand_shuffle)
23794 if (GET_MODE (target) != V16QImode)
23795 target = gen_reg_rtx (V16QImode);
23796 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
23797 if (target != operands[0])
23798 emit_move_insn (operands[0],
23799 gen_lowpart (GET_MODE (operands[0]), target));
23806 /* Shuffle the two input vectors independently. */
23807 t1 = gen_reg_rtx (V16QImode);
23808 t2 = gen_reg_rtx (V16QImode);
23809 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
23810 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
23813 /* Then merge them together. The key is whether any given control
23814 element contained a bit set that indicates the second word. */
23815 mask = operands[3];
23817 if (maskmode == V2DImode && !TARGET_SSE4_1)
23819 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
23820 more shuffle to convert the V2DI input mask into a V4SI
23821 input mask. At which point the masking that expand_int_vcond
23822 will work as desired. */
23823 rtx t3 = gen_reg_rtx (V4SImode);
23824 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
23825 const0_rtx, const0_rtx,
23826 const2_rtx, const2_rtx));
23828 maskmode = V4SImode;
23832 for (i = 0; i < w; i++)
23834 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
23835 vt = force_reg (maskmode, vt);
23836 mask = expand_simple_binop (maskmode, AND, mask, vt,
23837 NULL_RTX, 0, OPTAB_DIRECT);
23839 if (GET_MODE (target) != mode)
23840 target = gen_reg_rtx (mode);
23842 xops[1] = gen_lowpart (mode, t2);
23843 xops[2] = gen_lowpart (mode, t1);
23844 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
23847 ok = ix86_expand_int_vcond (xops);
23849 if (target != operands[0])
23850 emit_move_insn (operands[0],
23851 gen_lowpart (GET_MODE (operands[0]), target));
23855 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
23856 true if we should do zero extension, else sign extension. HIGH_P is
23857 true if we want the N/2 high elements, else the low elements. */
23860 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
23862 machine_mode imode = GET_MODE (src);
23867 rtx (*unpack)(rtx, rtx);
23868 rtx (*extract)(rtx, rtx) = NULL;
23869 machine_mode halfmode = BLKmode;
23875 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
23877 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
23878 halfmode = V32QImode;
23880 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
23884 unpack = gen_avx2_zero_extendv16qiv16hi2;
23886 unpack = gen_avx2_sign_extendv16qiv16hi2;
23887 halfmode = V16QImode;
23889 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
23893 unpack = gen_avx512f_zero_extendv16hiv16si2;
23895 unpack = gen_avx512f_sign_extendv16hiv16si2;
23896 halfmode = V16HImode;
23898 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
23902 unpack = gen_avx2_zero_extendv8hiv8si2;
23904 unpack = gen_avx2_sign_extendv8hiv8si2;
23905 halfmode = V8HImode;
23907 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
23911 unpack = gen_avx512f_zero_extendv8siv8di2;
23913 unpack = gen_avx512f_sign_extendv8siv8di2;
23914 halfmode = V8SImode;
23916 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
23920 unpack = gen_avx2_zero_extendv4siv4di2;
23922 unpack = gen_avx2_sign_extendv4siv4di2;
23923 halfmode = V4SImode;
23925 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
23929 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
23931 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
23935 unpack = gen_sse4_1_zero_extendv4hiv4si2;
23937 unpack = gen_sse4_1_sign_extendv4hiv4si2;
23941 unpack = gen_sse4_1_zero_extendv2siv2di2;
23943 unpack = gen_sse4_1_sign_extendv2siv2di2;
23946 gcc_unreachable ();
23949 if (GET_MODE_SIZE (imode) >= 32)
23951 tmp = gen_reg_rtx (halfmode);
23952 emit_insn (extract (tmp, src));
23956 /* Shift higher 8 bytes to lower 8 bytes. */
23957 tmp = gen_reg_rtx (V1TImode);
23958 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
23960 tmp = gen_lowpart (imode, tmp);
23965 emit_insn (unpack (dest, tmp));
23969 rtx (*unpack)(rtx, rtx, rtx);
23975 unpack = gen_vec_interleave_highv16qi;
23977 unpack = gen_vec_interleave_lowv16qi;
23981 unpack = gen_vec_interleave_highv8hi;
23983 unpack = gen_vec_interleave_lowv8hi;
23987 unpack = gen_vec_interleave_highv4si;
23989 unpack = gen_vec_interleave_lowv4si;
23992 gcc_unreachable ();
23996 tmp = force_reg (imode, CONST0_RTX (imode));
23998 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
23999 src, pc_rtx, pc_rtx);
24001 rtx tmp2 = gen_reg_rtx (imode);
24002 emit_insn (unpack (tmp2, src, tmp));
24003 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
24007 /* Expand conditional increment or decrement using adb/sbb instructions.
24008 The default case using setcc followed by the conditional move can be
24009 done by generic code. */
24011 ix86_expand_int_addcc (rtx operands[])
24013 enum rtx_code code = GET_CODE (operands[1]);
24015 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
24017 rtx val = const0_rtx;
24018 bool fpcmp = false;
24020 rtx op0 = XEXP (operands[1], 0);
24021 rtx op1 = XEXP (operands[1], 1);
24023 if (operands[3] != const1_rtx
24024 && operands[3] != constm1_rtx)
24026 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
24028 code = GET_CODE (compare_op);
24030 flags = XEXP (compare_op, 0);
24032 if (GET_MODE (flags) == CCFPmode
24033 || GET_MODE (flags) == CCFPUmode)
24036 code = ix86_fp_compare_code_to_integer (code);
24043 PUT_CODE (compare_op,
24044 reverse_condition_maybe_unordered
24045 (GET_CODE (compare_op)));
24047 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
24050 mode = GET_MODE (operands[0]);
24052 /* Construct either adc or sbb insn. */
24053 if ((code == LTU) == (operands[3] == constm1_rtx))
24058 insn = gen_subqi3_carry;
24061 insn = gen_subhi3_carry;
24064 insn = gen_subsi3_carry;
24067 insn = gen_subdi3_carry;
24070 gcc_unreachable ();
24078 insn = gen_addqi3_carry;
24081 insn = gen_addhi3_carry;
24084 insn = gen_addsi3_carry;
24087 insn = gen_adddi3_carry;
24090 gcc_unreachable ();
24093 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
24099 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
24100 but works for floating pointer parameters and nonoffsetable memories.
24101 For pushes, it returns just stack offsets; the values will be saved
24102 in the right order. Maximally three parts are generated. */
24105 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
24110 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
24112 size = (GET_MODE_SIZE (mode) + 4) / 8;
24114 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
24115 gcc_assert (size >= 2 && size <= 4);
24117 /* Optimize constant pool reference to immediates. This is used by fp
24118 moves, that force all constants to memory to allow combining. */
24119 if (MEM_P (operand) && MEM_READONLY_P (operand))
24121 rtx tmp = maybe_get_pool_constant (operand);
24126 if (MEM_P (operand) && !offsettable_memref_p (operand))
24128 /* The only non-offsetable memories we handle are pushes. */
24129 int ok = push_operand (operand, VOIDmode);
24133 operand = copy_rtx (operand);
24134 PUT_MODE (operand, word_mode);
24135 parts[0] = parts[1] = parts[2] = parts[3] = operand;
24139 if (GET_CODE (operand) == CONST_VECTOR)
24141 machine_mode imode = int_mode_for_mode (mode);
24142 /* Caution: if we looked through a constant pool memory above,
24143 the operand may actually have a different mode now. That's
24144 ok, since we want to pun this all the way back to an integer. */
24145 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
24146 gcc_assert (operand != NULL);
24152 if (mode == DImode)
24153 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24158 if (REG_P (operand))
24160 gcc_assert (reload_completed);
24161 for (i = 0; i < size; i++)
24162 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
24164 else if (offsettable_memref_p (operand))
24166 operand = adjust_address (operand, SImode, 0);
24167 parts[0] = operand;
24168 for (i = 1; i < size; i++)
24169 parts[i] = adjust_address (operand, SImode, 4 * i);
24171 else if (CONST_DOUBLE_P (operand))
24173 const REAL_VALUE_TYPE *r;
24176 r = CONST_DOUBLE_REAL_VALUE (operand);
24180 real_to_target (l, r, mode);
24181 parts[3] = gen_int_mode (l[3], SImode);
24182 parts[2] = gen_int_mode (l[2], SImode);
24185 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
24186 long double may not be 80-bit. */
24187 real_to_target (l, r, mode);
24188 parts[2] = gen_int_mode (l[2], SImode);
24191 REAL_VALUE_TO_TARGET_DOUBLE (*r, l);
24194 gcc_unreachable ();
24196 parts[1] = gen_int_mode (l[1], SImode);
24197 parts[0] = gen_int_mode (l[0], SImode);
24200 gcc_unreachable ();
24205 if (mode == TImode)
24206 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
24207 if (mode == XFmode || mode == TFmode)
24209 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
24210 if (REG_P (operand))
24212 gcc_assert (reload_completed);
24213 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
24214 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
24216 else if (offsettable_memref_p (operand))
24218 operand = adjust_address (operand, DImode, 0);
24219 parts[0] = operand;
24220 parts[1] = adjust_address (operand, upper_mode, 8);
24222 else if (CONST_DOUBLE_P (operand))
24226 real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode);
24228 /* real_to_target puts 32-bit pieces in each long. */
24231 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
24232 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
24235 if (upper_mode == SImode)
24236 parts[1] = gen_int_mode (l[2], SImode);
24240 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
24241 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
24245 gcc_unreachable ();
24252 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
24253 Return false when normal moves are needed; true when all required
24254 insns have been emitted. Operands 2-4 contain the input values
24255 int the correct order; operands 5-7 contain the output values. */
24258 ix86_split_long_move (rtx operands[])
24263 int collisions = 0;
24264 machine_mode mode = GET_MODE (operands[0]);
24265 bool collisionparts[4];
24267 /* The DFmode expanders may ask us to move double.
24268 For 64bit target this is single move. By hiding the fact
24269 here we simplify i386.md splitters. */
24270 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
24272 /* Optimize constant pool reference to immediates. This is used by
24273 fp moves, that force all constants to memory to allow combining. */
24275 if (MEM_P (operands[1])
24276 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
24277 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
24278 operands[1] = get_pool_constant (XEXP (operands[1], 0));
24279 if (push_operand (operands[0], VOIDmode))
24281 operands[0] = copy_rtx (operands[0]);
24282 PUT_MODE (operands[0], word_mode);
24285 operands[0] = gen_lowpart (DImode, operands[0]);
24286 operands[1] = gen_lowpart (DImode, operands[1]);
24287 emit_move_insn (operands[0], operands[1]);
24291 /* The only non-offsettable memory we handle is push. */
24292 if (push_operand (operands[0], VOIDmode))
24295 gcc_assert (!MEM_P (operands[0])
24296 || offsettable_memref_p (operands[0]));
24298 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
24299 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
24301 /* When emitting push, take care for source operands on the stack. */
24302 if (push && MEM_P (operands[1])
24303 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
24305 rtx src_base = XEXP (part[1][nparts - 1], 0);
24307 /* Compensate for the stack decrement by 4. */
24308 if (!TARGET_64BIT && nparts == 3
24309 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
24310 src_base = plus_constant (Pmode, src_base, 4);
24312 /* src_base refers to the stack pointer and is
24313 automatically decreased by emitted push. */
24314 for (i = 0; i < nparts; i++)
24315 part[1][i] = change_address (part[1][i],
24316 GET_MODE (part[1][i]), src_base);
24319 /* We need to do copy in the right order in case an address register
24320 of the source overlaps the destination. */
24321 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
24325 for (i = 0; i < nparts; i++)
24328 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
24329 if (collisionparts[i])
24333 /* Collision in the middle part can be handled by reordering. */
24334 if (collisions == 1 && nparts == 3 && collisionparts [1])
24336 std::swap (part[0][1], part[0][2]);
24337 std::swap (part[1][1], part[1][2]);
24339 else if (collisions == 1
24341 && (collisionparts [1] || collisionparts [2]))
24343 if (collisionparts [1])
24345 std::swap (part[0][1], part[0][2]);
24346 std::swap (part[1][1], part[1][2]);
24350 std::swap (part[0][2], part[0][3]);
24351 std::swap (part[1][2], part[1][3]);
24355 /* If there are more collisions, we can't handle it by reordering.
24356 Do an lea to the last part and use only one colliding move. */
24357 else if (collisions > 1)
24359 rtx base, addr, tls_base = NULL_RTX;
24363 base = part[0][nparts - 1];
24365 /* Handle the case when the last part isn't valid for lea.
24366 Happens in 64-bit mode storing the 12-byte XFmode. */
24367 if (GET_MODE (base) != Pmode)
24368 base = gen_rtx_REG (Pmode, REGNO (base));
24370 addr = XEXP (part[1][0], 0);
24371 if (TARGET_TLS_DIRECT_SEG_REFS)
24373 struct ix86_address parts;
24374 int ok = ix86_decompose_address (addr, &parts);
24376 if (parts.seg == DEFAULT_TLS_SEG_REG)
24378 /* It is not valid to use %gs: or %fs: in
24379 lea though, so we need to remove it from the
24380 address used for lea and add it to each individual
24381 memory loads instead. */
24382 addr = copy_rtx (addr);
24384 while (GET_CODE (*x) == PLUS)
24386 for (i = 0; i < 2; i++)
24388 rtx u = XEXP (*x, i);
24389 if (GET_CODE (u) == ZERO_EXTEND)
24391 if (GET_CODE (u) == UNSPEC
24392 && XINT (u, 1) == UNSPEC_TP)
24394 tls_base = XEXP (*x, i);
24395 *x = XEXP (*x, 1 - i);
24403 gcc_assert (tls_base);
24406 emit_insn (gen_rtx_SET (base, addr));
24408 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
24409 part[1][0] = replace_equiv_address (part[1][0], base);
24410 for (i = 1; i < nparts; i++)
24413 base = copy_rtx (base);
24414 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
24415 part[1][i] = replace_equiv_address (part[1][i], tmp);
24426 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
24427 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
24428 stack_pointer_rtx, GEN_INT (-4)));
24429 emit_move_insn (part[0][2], part[1][2]);
24431 else if (nparts == 4)
24433 emit_move_insn (part[0][3], part[1][3]);
24434 emit_move_insn (part[0][2], part[1][2]);
24439 /* In 64bit mode we don't have 32bit push available. In case this is
24440 register, it is OK - we will just use larger counterpart. We also
24441 retype memory - these comes from attempt to avoid REX prefix on
24442 moving of second half of TFmode value. */
24443 if (GET_MODE (part[1][1]) == SImode)
24445 switch (GET_CODE (part[1][1]))
24448 part[1][1] = adjust_address (part[1][1], DImode, 0);
24452 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
24456 gcc_unreachable ();
24459 if (GET_MODE (part[1][0]) == SImode)
24460 part[1][0] = part[1][1];
24463 emit_move_insn (part[0][1], part[1][1]);
24464 emit_move_insn (part[0][0], part[1][0]);
24468 /* Choose correct order to not overwrite the source before it is copied. */
24469 if ((REG_P (part[0][0])
24470 && REG_P (part[1][1])
24471 && (REGNO (part[0][0]) == REGNO (part[1][1])
24473 && REGNO (part[0][0]) == REGNO (part[1][2]))
24475 && REGNO (part[0][0]) == REGNO (part[1][3]))))
24477 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
24479 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
24481 operands[2 + i] = part[0][j];
24482 operands[6 + i] = part[1][j];
24487 for (i = 0; i < nparts; i++)
24489 operands[2 + i] = part[0][i];
24490 operands[6 + i] = part[1][i];
24494 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
24495 if (optimize_insn_for_size_p ())
24497 for (j = 0; j < nparts - 1; j++)
24498 if (CONST_INT_P (operands[6 + j])
24499 && operands[6 + j] != const0_rtx
24500 && REG_P (operands[2 + j]))
24501 for (i = j; i < nparts - 1; i++)
24502 if (CONST_INT_P (operands[7 + i])
24503 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
24504 operands[7 + i] = operands[2 + j];
24507 for (i = 0; i < nparts; i++)
24508 emit_move_insn (operands[2 + i], operands[6 + i]);
24513 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
24514 left shift by a constant, either using a single shift or
24515 a sequence of add instructions. */
24518 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
24520 rtx (*insn)(rtx, rtx, rtx);
24523 || (count * ix86_cost->add <= ix86_cost->shift_const
24524 && !optimize_insn_for_size_p ()))
24526 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
24527 while (count-- > 0)
24528 emit_insn (insn (operand, operand, operand));
24532 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24533 emit_insn (insn (operand, operand, GEN_INT (count)));
24538 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
24540 rtx (*gen_ashl3)(rtx, rtx, rtx);
24541 rtx (*gen_shld)(rtx, rtx, rtx);
24542 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24544 rtx low[2], high[2];
24547 if (CONST_INT_P (operands[2]))
24549 split_double_mode (mode, operands, 2, low, high);
24550 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24552 if (count >= half_width)
24554 emit_move_insn (high[0], low[1]);
24555 emit_move_insn (low[0], const0_rtx);
24557 if (count > half_width)
24558 ix86_expand_ashl_const (high[0], count - half_width, mode);
24562 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24564 if (!rtx_equal_p (operands[0], operands[1]))
24565 emit_move_insn (operands[0], operands[1]);
24567 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
24568 ix86_expand_ashl_const (low[0], count, mode);
24573 split_double_mode (mode, operands, 1, low, high);
24575 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
24577 if (operands[1] == const1_rtx)
24579 /* Assuming we've chosen a QImode capable registers, then 1 << N
24580 can be done with two 32/64-bit shifts, no branches, no cmoves. */
24581 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
24583 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
24585 ix86_expand_clear (low[0]);
24586 ix86_expand_clear (high[0]);
24587 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
24589 d = gen_lowpart (QImode, low[0]);
24590 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24591 s = gen_rtx_EQ (QImode, flags, const0_rtx);
24592 emit_insn (gen_rtx_SET (d, s));
24594 d = gen_lowpart (QImode, high[0]);
24595 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
24596 s = gen_rtx_NE (QImode, flags, const0_rtx);
24597 emit_insn (gen_rtx_SET (d, s));
24600 /* Otherwise, we can get the same results by manually performing
24601 a bit extract operation on bit 5/6, and then performing the two
24602 shifts. The two methods of getting 0/1 into low/high are exactly
24603 the same size. Avoiding the shift in the bit extract case helps
24604 pentium4 a bit; no one else seems to care much either way. */
24607 machine_mode half_mode;
24608 rtx (*gen_lshr3)(rtx, rtx, rtx);
24609 rtx (*gen_and3)(rtx, rtx, rtx);
24610 rtx (*gen_xor3)(rtx, rtx, rtx);
24611 HOST_WIDE_INT bits;
24614 if (mode == DImode)
24616 half_mode = SImode;
24617 gen_lshr3 = gen_lshrsi3;
24618 gen_and3 = gen_andsi3;
24619 gen_xor3 = gen_xorsi3;
24624 half_mode = DImode;
24625 gen_lshr3 = gen_lshrdi3;
24626 gen_and3 = gen_anddi3;
24627 gen_xor3 = gen_xordi3;
24631 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
24632 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
24634 x = gen_lowpart (half_mode, operands[2]);
24635 emit_insn (gen_rtx_SET (high[0], x));
24637 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
24638 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
24639 emit_move_insn (low[0], high[0]);
24640 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
24643 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24644 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
24648 if (operands[1] == constm1_rtx)
24650 /* For -1 << N, we can avoid the shld instruction, because we
24651 know that we're shifting 0...31/63 ones into a -1. */
24652 emit_move_insn (low[0], constm1_rtx);
24653 if (optimize_insn_for_size_p ())
24654 emit_move_insn (high[0], low[0]);
24656 emit_move_insn (high[0], constm1_rtx);
24660 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
24662 if (!rtx_equal_p (operands[0], operands[1]))
24663 emit_move_insn (operands[0], operands[1]);
24665 split_double_mode (mode, operands, 1, low, high);
24666 emit_insn (gen_shld (high[0], low[0], operands[2]));
24669 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
24671 if (TARGET_CMOVE && scratch)
24673 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24674 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24676 ix86_expand_clear (scratch);
24677 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
24681 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24682 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24684 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
24689 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
24691 rtx (*gen_ashr3)(rtx, rtx, rtx)
24692 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
24693 rtx (*gen_shrd)(rtx, rtx, rtx);
24694 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24696 rtx low[2], high[2];
24699 if (CONST_INT_P (operands[2]))
24701 split_double_mode (mode, operands, 2, low, high);
24702 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24704 if (count == GET_MODE_BITSIZE (mode) - 1)
24706 emit_move_insn (high[0], high[1]);
24707 emit_insn (gen_ashr3 (high[0], high[0],
24708 GEN_INT (half_width - 1)));
24709 emit_move_insn (low[0], high[0]);
24712 else if (count >= half_width)
24714 emit_move_insn (low[0], high[1]);
24715 emit_move_insn (high[0], low[0]);
24716 emit_insn (gen_ashr3 (high[0], high[0],
24717 GEN_INT (half_width - 1)));
24719 if (count > half_width)
24720 emit_insn (gen_ashr3 (low[0], low[0],
24721 GEN_INT (count - half_width)));
24725 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24727 if (!rtx_equal_p (operands[0], operands[1]))
24728 emit_move_insn (operands[0], operands[1]);
24730 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24731 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
24736 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24738 if (!rtx_equal_p (operands[0], operands[1]))
24739 emit_move_insn (operands[0], operands[1]);
24741 split_double_mode (mode, operands, 1, low, high);
24743 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24744 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
24746 if (TARGET_CMOVE && scratch)
24748 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24749 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24751 emit_move_insn (scratch, high[0]);
24752 emit_insn (gen_ashr3 (scratch, scratch,
24753 GEN_INT (half_width - 1)));
24754 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24759 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
24760 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
24762 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
24768 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
24770 rtx (*gen_lshr3)(rtx, rtx, rtx)
24771 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
24772 rtx (*gen_shrd)(rtx, rtx, rtx);
24773 int half_width = GET_MODE_BITSIZE (mode) >> 1;
24775 rtx low[2], high[2];
24778 if (CONST_INT_P (operands[2]))
24780 split_double_mode (mode, operands, 2, low, high);
24781 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
24783 if (count >= half_width)
24785 emit_move_insn (low[0], high[1]);
24786 ix86_expand_clear (high[0]);
24788 if (count > half_width)
24789 emit_insn (gen_lshr3 (low[0], low[0],
24790 GEN_INT (count - half_width)));
24794 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24796 if (!rtx_equal_p (operands[0], operands[1]))
24797 emit_move_insn (operands[0], operands[1]);
24799 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
24800 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
24805 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
24807 if (!rtx_equal_p (operands[0], operands[1]))
24808 emit_move_insn (operands[0], operands[1]);
24810 split_double_mode (mode, operands, 1, low, high);
24812 emit_insn (gen_shrd (low[0], high[0], operands[2]));
24813 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
24815 if (TARGET_CMOVE && scratch)
24817 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
24818 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
24820 ix86_expand_clear (scratch);
24821 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
24826 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
24827 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
24829 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
24834 /* Predict just emitted jump instruction to be taken with probability PROB. */
24836 predict_jump (int prob)
24838 rtx insn = get_last_insn ();
24839 gcc_assert (JUMP_P (insn));
24840 add_int_reg_note (insn, REG_BR_PROB, prob);
24843 /* Helper function for the string operations below. Dest VARIABLE whether
24844 it is aligned to VALUE bytes. If true, jump to the label. */
24845 static rtx_code_label *
24846 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
24848 rtx_code_label *label = gen_label_rtx ();
24849 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
24850 if (GET_MODE (variable) == DImode)
24851 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
24853 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
24854 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
24857 predict_jump (REG_BR_PROB_BASE * 50 / 100);
24859 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24863 /* Adjust COUNTER by the VALUE. */
24865 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
24867 rtx (*gen_add)(rtx, rtx, rtx)
24868 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
24870 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
24873 /* Zero extend possibly SImode EXP to Pmode register. */
24875 ix86_zero_extend_to_Pmode (rtx exp)
24877 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
24880 /* Divide COUNTREG by SCALE. */
24882 scale_counter (rtx countreg, int scale)
24888 if (CONST_INT_P (countreg))
24889 return GEN_INT (INTVAL (countreg) / scale);
24890 gcc_assert (REG_P (countreg));
24892 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
24893 GEN_INT (exact_log2 (scale)),
24894 NULL, 1, OPTAB_DIRECT);
24898 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
24899 DImode for constant loop counts. */
24901 static machine_mode
24902 counter_mode (rtx count_exp)
24904 if (GET_MODE (count_exp) != VOIDmode)
24905 return GET_MODE (count_exp);
24906 if (!CONST_INT_P (count_exp))
24908 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
24913 /* Copy the address to a Pmode register. This is used for x32 to
24914 truncate DImode TLS address to a SImode register. */
24917 ix86_copy_addr_to_reg (rtx addr)
24920 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
24922 reg = copy_addr_to_reg (addr);
24923 REG_POINTER (reg) = 1;
24928 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
24929 reg = copy_to_mode_reg (DImode, addr);
24930 REG_POINTER (reg) = 1;
24931 return gen_rtx_SUBREG (SImode, reg, 0);
24935 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
24936 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
24937 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
24938 memory by VALUE (supposed to be in MODE).
24940 The size is rounded down to whole number of chunk size moved at once.
24941 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
24945 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
24946 rtx destptr, rtx srcptr, rtx value,
24947 rtx count, machine_mode mode, int unroll,
24948 int expected_size, bool issetmem)
24950 rtx_code_label *out_label, *top_label;
24952 machine_mode iter_mode = counter_mode (count);
24953 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
24954 rtx piece_size = GEN_INT (piece_size_n);
24955 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
24959 top_label = gen_label_rtx ();
24960 out_label = gen_label_rtx ();
24961 iter = gen_reg_rtx (iter_mode);
24963 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
24964 NULL, 1, OPTAB_DIRECT);
24965 /* Those two should combine. */
24966 if (piece_size == const1_rtx)
24968 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
24970 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24972 emit_move_insn (iter, const0_rtx);
24974 emit_label (top_label);
24976 tmp = convert_modes (Pmode, iter_mode, iter, true);
24978 /* This assert could be relaxed - in this case we'll need to compute
24979 smallest power of two, containing in PIECE_SIZE_N and pass it to
24981 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
24982 destmem = offset_address (destmem, tmp, piece_size_n);
24983 destmem = adjust_address (destmem, mode, 0);
24987 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
24988 srcmem = adjust_address (srcmem, mode, 0);
24990 /* When unrolling for chips that reorder memory reads and writes,
24991 we can save registers by using single temporary.
24992 Also using 4 temporaries is overkill in 32bit mode. */
24993 if (!TARGET_64BIT && 0)
24995 for (i = 0; i < unroll; i++)
25000 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25002 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25004 emit_move_insn (destmem, srcmem);
25010 gcc_assert (unroll <= 4);
25011 for (i = 0; i < unroll; i++)
25013 tmpreg[i] = gen_reg_rtx (mode);
25017 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
25019 emit_move_insn (tmpreg[i], srcmem);
25021 for (i = 0; i < unroll; i++)
25026 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25028 emit_move_insn (destmem, tmpreg[i]);
25033 for (i = 0; i < unroll; i++)
25037 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
25038 emit_move_insn (destmem, value);
25041 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
25042 true, OPTAB_LIB_WIDEN);
25044 emit_move_insn (iter, tmp);
25046 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
25048 if (expected_size != -1)
25050 expected_size /= GET_MODE_SIZE (mode) * unroll;
25051 if (expected_size == 0)
25053 else if (expected_size > REG_BR_PROB_BASE)
25054 predict_jump (REG_BR_PROB_BASE - 1);
25056 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
25059 predict_jump (REG_BR_PROB_BASE * 80 / 100);
25060 iter = ix86_zero_extend_to_Pmode (iter);
25061 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
25062 true, OPTAB_LIB_WIDEN);
25063 if (tmp != destptr)
25064 emit_move_insn (destptr, tmp);
25067 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
25068 true, OPTAB_LIB_WIDEN);
25070 emit_move_insn (srcptr, tmp);
25072 emit_label (out_label);
25075 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
25076 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
25077 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
25078 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
25079 ORIG_VALUE is the original value passed to memset to fill the memory with.
25080 Other arguments have same meaning as for previous function. */
25083 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
25084 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
25086 machine_mode mode, bool issetmem)
25091 HOST_WIDE_INT rounded_count;
25093 /* If possible, it is shorter to use rep movs.
25094 TODO: Maybe it is better to move this logic to decide_alg. */
25095 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
25096 && (!issetmem || orig_value == const0_rtx))
25099 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
25100 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
25102 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
25103 GET_MODE_SIZE (mode)));
25104 if (mode != QImode)
25106 destexp = gen_rtx_ASHIFT (Pmode, countreg,
25107 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25108 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
25111 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
25112 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
25115 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25116 destmem = shallow_copy_rtx (destmem);
25117 set_mem_size (destmem, rounded_count);
25119 else if (MEM_SIZE_KNOWN_P (destmem))
25120 clear_mem_size (destmem);
25124 value = force_reg (mode, gen_lowpart (mode, value));
25125 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
25129 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
25130 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
25131 if (mode != QImode)
25133 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
25134 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
25135 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
25138 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
25139 if (CONST_INT_P (count))
25142 = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode));
25143 srcmem = shallow_copy_rtx (srcmem);
25144 set_mem_size (srcmem, rounded_count);
25148 if (MEM_SIZE_KNOWN_P (srcmem))
25149 clear_mem_size (srcmem);
25151 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
25156 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
25158 SRC is passed by pointer to be updated on return.
25159 Return value is updated DST. */
25161 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
25162 HOST_WIDE_INT size_to_move)
25164 rtx dst = destmem, src = *srcmem, adjust, tempreg;
25165 enum insn_code code;
25166 machine_mode move_mode;
25169 /* Find the widest mode in which we could perform moves.
25170 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25171 it until move of such size is supported. */
25172 piece_size = 1 << floor_log2 (size_to_move);
25173 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25174 code = optab_handler (mov_optab, move_mode);
25175 while (code == CODE_FOR_nothing && piece_size > 1)
25178 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
25179 code = optab_handler (mov_optab, move_mode);
25182 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25183 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25184 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25186 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25187 move_mode = mode_for_vector (word_mode, nunits);
25188 code = optab_handler (mov_optab, move_mode);
25189 if (code == CODE_FOR_nothing)
25191 move_mode = word_mode;
25192 piece_size = GET_MODE_SIZE (move_mode);
25193 code = optab_handler (mov_optab, move_mode);
25196 gcc_assert (code != CODE_FOR_nothing);
25198 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25199 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
25201 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25202 gcc_assert (size_to_move % piece_size == 0);
25203 adjust = GEN_INT (piece_size);
25204 for (i = 0; i < size_to_move; i += piece_size)
25206 /* We move from memory to memory, so we'll need to do it via
25207 a temporary register. */
25208 tempreg = gen_reg_rtx (move_mode);
25209 emit_insn (GEN_FCN (code) (tempreg, src));
25210 emit_insn (GEN_FCN (code) (dst, tempreg));
25212 emit_move_insn (destptr,
25213 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25214 emit_move_insn (srcptr,
25215 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
25217 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25219 src = adjust_automodify_address_nv (src, move_mode, srcptr,
25223 /* Update DST and SRC rtx. */
25228 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
25230 expand_movmem_epilogue (rtx destmem, rtx srcmem,
25231 rtx destptr, rtx srcptr, rtx count, int max_size)
25234 if (CONST_INT_P (count))
25236 HOST_WIDE_INT countval = INTVAL (count);
25237 HOST_WIDE_INT epilogue_size = countval % max_size;
25240 /* For now MAX_SIZE should be a power of 2. This assert could be
25241 relaxed, but it'll require a bit more complicated epilogue
25243 gcc_assert ((max_size & (max_size - 1)) == 0);
25244 for (i = max_size; i >= 1; i >>= 1)
25246 if (epilogue_size & i)
25247 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25253 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
25254 count, 1, OPTAB_DIRECT);
25255 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
25256 count, QImode, 1, 4, false);
25260 /* When there are stringops, we can cheaply increase dest and src pointers.
25261 Otherwise we save code size by maintaining offset (zero is readily
25262 available from preceding rep operation) and using x86 addressing modes.
25264 if (TARGET_SINGLE_STRINGOP)
25268 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25269 src = change_address (srcmem, SImode, srcptr);
25270 dest = change_address (destmem, SImode, destptr);
25271 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25272 emit_label (label);
25273 LABEL_NUSES (label) = 1;
25277 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25278 src = change_address (srcmem, HImode, srcptr);
25279 dest = change_address (destmem, HImode, destptr);
25280 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25281 emit_label (label);
25282 LABEL_NUSES (label) = 1;
25286 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25287 src = change_address (srcmem, QImode, srcptr);
25288 dest = change_address (destmem, QImode, destptr);
25289 emit_insn (gen_strmov (destptr, dest, srcptr, src));
25290 emit_label (label);
25291 LABEL_NUSES (label) = 1;
25296 rtx offset = force_reg (Pmode, const0_rtx);
25301 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25302 src = change_address (srcmem, SImode, srcptr);
25303 dest = change_address (destmem, SImode, destptr);
25304 emit_move_insn (dest, src);
25305 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
25306 true, OPTAB_LIB_WIDEN);
25308 emit_move_insn (offset, tmp);
25309 emit_label (label);
25310 LABEL_NUSES (label) = 1;
25314 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25315 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25316 src = change_address (srcmem, HImode, tmp);
25317 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25318 dest = change_address (destmem, HImode, tmp);
25319 emit_move_insn (dest, src);
25320 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
25321 true, OPTAB_LIB_WIDEN);
25323 emit_move_insn (offset, tmp);
25324 emit_label (label);
25325 LABEL_NUSES (label) = 1;
25329 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25330 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
25331 src = change_address (srcmem, QImode, tmp);
25332 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
25333 dest = change_address (destmem, QImode, tmp);
25334 emit_move_insn (dest, src);
25335 emit_label (label);
25336 LABEL_NUSES (label) = 1;
25341 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
25342 with value PROMOTED_VAL.
25343 SRC is passed by pointer to be updated on return.
25344 Return value is updated DST. */
25346 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
25347 HOST_WIDE_INT size_to_move)
25349 rtx dst = destmem, adjust;
25350 enum insn_code code;
25351 machine_mode move_mode;
25354 /* Find the widest mode in which we could perform moves.
25355 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
25356 it until move of such size is supported. */
25357 move_mode = GET_MODE (promoted_val);
25358 if (move_mode == VOIDmode)
25359 move_mode = QImode;
25360 if (size_to_move < GET_MODE_SIZE (move_mode))
25362 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
25363 promoted_val = gen_lowpart (move_mode, promoted_val);
25365 piece_size = GET_MODE_SIZE (move_mode);
25366 code = optab_handler (mov_optab, move_mode);
25367 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
25369 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
25371 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
25372 gcc_assert (size_to_move % piece_size == 0);
25373 adjust = GEN_INT (piece_size);
25374 for (i = 0; i < size_to_move; i += piece_size)
25376 if (piece_size <= GET_MODE_SIZE (word_mode))
25378 emit_insn (gen_strset (destptr, dst, promoted_val));
25379 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25384 emit_insn (GEN_FCN (code) (dst, promoted_val));
25386 emit_move_insn (destptr,
25387 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
25389 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
25393 /* Update DST rtx. */
25396 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25398 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
25399 rtx count, int max_size)
25402 expand_simple_binop (counter_mode (count), AND, count,
25403 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
25404 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
25405 gen_lowpart (QImode, value), count, QImode,
25406 1, max_size / 2, true);
25409 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
25411 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
25412 rtx count, int max_size)
25416 if (CONST_INT_P (count))
25418 HOST_WIDE_INT countval = INTVAL (count);
25419 HOST_WIDE_INT epilogue_size = countval % max_size;
25422 /* For now MAX_SIZE should be a power of 2. This assert could be
25423 relaxed, but it'll require a bit more complicated epilogue
25425 gcc_assert ((max_size & (max_size - 1)) == 0);
25426 for (i = max_size; i >= 1; i >>= 1)
25428 if (epilogue_size & i)
25430 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25431 destmem = emit_memset (destmem, destptr, vec_value, i);
25433 destmem = emit_memset (destmem, destptr, value, i);
25440 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
25445 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
25448 dest = change_address (destmem, DImode, destptr);
25449 emit_insn (gen_strset (destptr, dest, value));
25450 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
25451 emit_insn (gen_strset (destptr, dest, value));
25455 dest = change_address (destmem, SImode, destptr);
25456 emit_insn (gen_strset (destptr, dest, value));
25457 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25458 emit_insn (gen_strset (destptr, dest, value));
25459 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
25460 emit_insn (gen_strset (destptr, dest, value));
25461 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
25462 emit_insn (gen_strset (destptr, dest, value));
25464 emit_label (label);
25465 LABEL_NUSES (label) = 1;
25469 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
25472 dest = change_address (destmem, DImode, destptr);
25473 emit_insn (gen_strset (destptr, dest, value));
25477 dest = change_address (destmem, SImode, destptr);
25478 emit_insn (gen_strset (destptr, dest, value));
25479 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
25480 emit_insn (gen_strset (destptr, dest, value));
25482 emit_label (label);
25483 LABEL_NUSES (label) = 1;
25487 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
25488 dest = change_address (destmem, SImode, destptr);
25489 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
25490 emit_label (label);
25491 LABEL_NUSES (label) = 1;
25495 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
25496 dest = change_address (destmem, HImode, destptr);
25497 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
25498 emit_label (label);
25499 LABEL_NUSES (label) = 1;
25503 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
25504 dest = change_address (destmem, QImode, destptr);
25505 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
25506 emit_label (label);
25507 LABEL_NUSES (label) = 1;
25511 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
25512 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
25513 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
25515 Return value is updated DESTMEM. */
25517 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
25518 rtx destptr, rtx srcptr, rtx value,
25519 rtx vec_value, rtx count, int align,
25520 int desired_alignment, bool issetmem)
25523 for (i = 1; i < desired_alignment; i <<= 1)
25527 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
25530 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
25531 destmem = emit_memset (destmem, destptr, vec_value, i);
25533 destmem = emit_memset (destmem, destptr, value, i);
25536 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
25537 ix86_adjust_counter (count, i);
25538 emit_label (label);
25539 LABEL_NUSES (label) = 1;
25540 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
25546 /* Test if COUNT&SIZE is nonzero and if so, expand movme
25547 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
25548 and jump to DONE_LABEL. */
25550 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
25551 rtx destptr, rtx srcptr,
25552 rtx value, rtx vec_value,
25553 rtx count, int size,
25554 rtx done_label, bool issetmem)
25556 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
25557 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
25561 /* If we do not have vector value to copy, we must reduce size. */
25566 if (GET_MODE (value) == VOIDmode && size > 8)
25568 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
25569 mode = GET_MODE (value);
25572 mode = GET_MODE (vec_value), value = vec_value;
25576 /* Choose appropriate vector mode. */
25578 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
25579 else if (size >= 16)
25580 mode = TARGET_SSE ? V16QImode : DImode;
25581 srcmem = change_address (srcmem, mode, srcptr);
25583 destmem = change_address (destmem, mode, destptr);
25584 modesize = GEN_INT (GET_MODE_SIZE (mode));
25585 gcc_assert (GET_MODE_SIZE (mode) <= size);
25586 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25589 emit_move_insn (destmem, gen_lowpart (mode, value));
25592 emit_move_insn (destmem, srcmem);
25593 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25595 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25598 destmem = offset_address (destmem, count, 1);
25599 destmem = offset_address (destmem, GEN_INT (-2 * size),
25600 GET_MODE_SIZE (mode));
25603 srcmem = offset_address (srcmem, count, 1);
25604 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
25605 GET_MODE_SIZE (mode));
25607 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
25610 emit_move_insn (destmem, gen_lowpart (mode, value));
25613 emit_move_insn (destmem, srcmem);
25614 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25616 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25618 emit_jump_insn (gen_jump (done_label));
25621 emit_label (label);
25622 LABEL_NUSES (label) = 1;
25625 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
25626 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
25627 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
25628 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
25629 DONE_LABEL is a label after the whole copying sequence. The label is created
25630 on demand if *DONE_LABEL is NULL.
25631 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
25632 bounds after the initial copies.
25634 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
25635 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
25636 we will dispatch to a library call for large blocks.
25638 In pseudocode we do:
25642 Assume that SIZE is 4. Bigger sizes are handled analogously
25645 copy 4 bytes from SRCPTR to DESTPTR
25646 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
25651 copy 1 byte from SRCPTR to DESTPTR
25654 copy 2 bytes from SRCPTR to DESTPTR
25655 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
25660 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
25661 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
25663 OLD_DESPTR = DESTPTR;
25664 Align DESTPTR up to DESIRED_ALIGN
25665 SRCPTR += DESTPTR - OLD_DESTPTR
25666 COUNT -= DEST_PTR - OLD_DESTPTR
25668 Round COUNT down to multiple of SIZE
25669 << optional caller supplied zero size guard is here >>
25670 << optional caller suppplied dynamic check is here >>
25671 << caller supplied main copy loop is here >>
25676 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
25677 rtx *destptr, rtx *srcptr,
25679 rtx value, rtx vec_value,
25681 rtx_code_label **done_label,
25685 unsigned HOST_WIDE_INT *min_size,
25686 bool dynamic_check,
25689 rtx_code_label *loop_label = NULL, *label;
25692 int prolog_size = 0;
25695 /* Chose proper value to copy. */
25696 if (issetmem && VECTOR_MODE_P (mode))
25697 mode_value = vec_value;
25699 mode_value = value;
25700 gcc_assert (GET_MODE_SIZE (mode) <= size);
25702 /* See if block is big or small, handle small blocks. */
25703 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
25706 loop_label = gen_label_rtx ();
25709 *done_label = gen_label_rtx ();
25711 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
25715 /* Handle sizes > 3. */
25716 for (;size2 > 2; size2 >>= 1)
25717 expand_small_movmem_or_setmem (destmem, srcmem,
25721 size2, *done_label, issetmem);
25722 /* Nothing to copy? Jump to DONE_LABEL if so */
25723 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
25726 /* Do a byte copy. */
25727 destmem = change_address (destmem, QImode, *destptr);
25729 emit_move_insn (destmem, gen_lowpart (QImode, value));
25732 srcmem = change_address (srcmem, QImode, *srcptr);
25733 emit_move_insn (destmem, srcmem);
25736 /* Handle sizes 2 and 3. */
25737 label = ix86_expand_aligntest (*count, 2, false);
25738 destmem = change_address (destmem, HImode, *destptr);
25739 destmem = offset_address (destmem, *count, 1);
25740 destmem = offset_address (destmem, GEN_INT (-2), 2);
25742 emit_move_insn (destmem, gen_lowpart (HImode, value));
25745 srcmem = change_address (srcmem, HImode, *srcptr);
25746 srcmem = offset_address (srcmem, *count, 1);
25747 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
25748 emit_move_insn (destmem, srcmem);
25751 emit_label (label);
25752 LABEL_NUSES (label) = 1;
25753 emit_jump_insn (gen_jump (*done_label));
25757 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
25758 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
25760 /* Start memcpy for COUNT >= SIZE. */
25763 emit_label (loop_label);
25764 LABEL_NUSES (loop_label) = 1;
25767 /* Copy first desired_align bytes. */
25769 srcmem = change_address (srcmem, mode, *srcptr);
25770 destmem = change_address (destmem, mode, *destptr);
25771 modesize = GEN_INT (GET_MODE_SIZE (mode));
25772 for (n = 0; prolog_size < desired_align - align; n++)
25775 emit_move_insn (destmem, mode_value);
25778 emit_move_insn (destmem, srcmem);
25779 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
25781 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
25782 prolog_size += GET_MODE_SIZE (mode);
25786 /* Copy last SIZE bytes. */
25787 destmem = offset_address (destmem, *count, 1);
25788 destmem = offset_address (destmem,
25789 GEN_INT (-size - prolog_size),
25792 emit_move_insn (destmem, mode_value);
25795 srcmem = offset_address (srcmem, *count, 1);
25796 srcmem = offset_address (srcmem,
25797 GEN_INT (-size - prolog_size),
25799 emit_move_insn (destmem, srcmem);
25801 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
25803 destmem = offset_address (destmem, modesize, 1);
25805 emit_move_insn (destmem, mode_value);
25808 srcmem = offset_address (srcmem, modesize, 1);
25809 emit_move_insn (destmem, srcmem);
25813 /* Align destination. */
25814 if (desired_align > 1 && desired_align > align)
25816 rtx saveddest = *destptr;
25818 gcc_assert (desired_align <= size);
25819 /* Align destptr up, place it to new register. */
25820 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
25821 GEN_INT (prolog_size),
25822 NULL_RTX, 1, OPTAB_DIRECT);
25823 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
25824 REG_POINTER (*destptr) = 1;
25825 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
25826 GEN_INT (-desired_align),
25827 *destptr, 1, OPTAB_DIRECT);
25828 /* See how many bytes we skipped. */
25829 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
25831 saveddest, 1, OPTAB_DIRECT);
25832 /* Adjust srcptr and count. */
25834 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
25835 saveddest, *srcptr, 1, OPTAB_DIRECT);
25836 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25837 saveddest, *count, 1, OPTAB_DIRECT);
25838 /* We copied at most size + prolog_size. */
25839 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
25841 = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size);
25845 /* Our loops always round down the bock size, but for dispatch to library
25846 we need precise value. */
25848 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
25849 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
25853 gcc_assert (prolog_size == 0);
25854 /* Decrease count, so we won't end up copying last word twice. */
25855 if (!CONST_INT_P (*count))
25856 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
25857 constm1_rtx, *count, 1, OPTAB_DIRECT);
25859 *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1,
25860 (unsigned HOST_WIDE_INT)size));
25862 *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size);
25867 /* This function is like the previous one, except here we know how many bytes
25868 need to be copied. That allows us to update alignment not only of DST, which
25869 is returned, but also of SRC, which is passed as a pointer for that
25872 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
25873 rtx srcreg, rtx value, rtx vec_value,
25874 int desired_align, int align_bytes,
25878 rtx orig_dst = dst;
25879 rtx orig_src = NULL;
25880 int piece_size = 1;
25881 int copied_bytes = 0;
25885 gcc_assert (srcp != NULL);
25890 for (piece_size = 1;
25891 piece_size <= desired_align && copied_bytes < align_bytes;
25894 if (align_bytes & piece_size)
25898 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
25899 dst = emit_memset (dst, destreg, vec_value, piece_size);
25901 dst = emit_memset (dst, destreg, value, piece_size);
25904 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
25905 copied_bytes += piece_size;
25908 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
25909 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25910 if (MEM_SIZE_KNOWN_P (orig_dst))
25911 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
25915 int src_align_bytes = get_mem_align_offset (src, desired_align
25917 if (src_align_bytes >= 0)
25918 src_align_bytes = desired_align - src_align_bytes;
25919 if (src_align_bytes >= 0)
25921 unsigned int src_align;
25922 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
25924 if ((src_align_bytes & (src_align - 1))
25925 == (align_bytes & (src_align - 1)))
25928 if (src_align > (unsigned int) desired_align)
25929 src_align = desired_align;
25930 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
25931 set_mem_align (src, src_align * BITS_PER_UNIT);
25933 if (MEM_SIZE_KNOWN_P (orig_src))
25934 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
25941 /* Return true if ALG can be used in current context.
25942 Assume we expand memset if MEMSET is true. */
25944 alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
25946 if (alg == no_stringop)
25948 if (alg == vector_loop)
25949 return TARGET_SSE || TARGET_AVX;
25950 /* Algorithms using the rep prefix want at least edi and ecx;
25951 additionally, memset wants eax and memcpy wants esi. Don't
25952 consider such algorithms if the user has appropriated those
25953 registers for their own purposes, or if we have a non-default
25954 address space, since some string insns cannot override the segment. */
25955 if (alg == rep_prefix_1_byte
25956 || alg == rep_prefix_4_byte
25957 || alg == rep_prefix_8_byte)
25961 if (fixed_regs[CX_REG]
25962 || fixed_regs[DI_REG]
25963 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
25969 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
25970 static enum stringop_alg
25971 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
25972 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
25973 bool memset, bool zero_memset, bool have_as,
25974 int *dynamic_check, bool *noalign)
25976 const struct stringop_algs * algs;
25977 bool optimize_for_speed;
25979 const struct processor_costs *cost;
25981 bool any_alg_usable_p = false;
25984 *dynamic_check = -1;
25986 /* Even if the string operation call is cold, we still might spend a lot
25987 of time processing large blocks. */
25988 if (optimize_function_for_size_p (cfun)
25989 || (optimize_insn_for_size_p ()
25991 || (expected_size != -1 && expected_size < 256))))
25992 optimize_for_speed = false;
25994 optimize_for_speed = true;
25996 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
25998 algs = &cost->memset[TARGET_64BIT != 0];
26000 algs = &cost->memcpy[TARGET_64BIT != 0];
26002 /* See maximal size for user defined algorithm. */
26003 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26005 enum stringop_alg candidate = algs->size[i].alg;
26006 bool usable = alg_usable_p (candidate, memset, have_as);
26007 any_alg_usable_p |= usable;
26009 if (candidate != libcall && candidate && usable)
26010 max = algs->size[i].max;
26013 /* If expected size is not known but max size is small enough
26014 so inline version is a win, set expected size into
26016 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
26017 && expected_size == -1)
26018 expected_size = min_size / 2 + max_size / 2;
26020 /* If user specified the algorithm, honnor it if possible. */
26021 if (ix86_stringop_alg != no_stringop
26022 && alg_usable_p (ix86_stringop_alg, memset, have_as))
26023 return ix86_stringop_alg;
26024 /* rep; movq or rep; movl is the smallest variant. */
26025 else if (!optimize_for_speed)
26028 if (!count || (count & 3) || (memset && !zero_memset))
26029 return alg_usable_p (rep_prefix_1_byte, memset, have_as)
26030 ? rep_prefix_1_byte : loop_1_byte;
26032 return alg_usable_p (rep_prefix_4_byte, memset, have_as)
26033 ? rep_prefix_4_byte : loop;
26035 /* Very tiny blocks are best handled via the loop, REP is expensive to
26037 else if (expected_size != -1 && expected_size < 4)
26038 return loop_1_byte;
26039 else if (expected_size != -1)
26041 enum stringop_alg alg = libcall;
26042 bool alg_noalign = false;
26043 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
26045 /* We get here if the algorithms that were not libcall-based
26046 were rep-prefix based and we are unable to use rep prefixes
26047 based on global register usage. Break out of the loop and
26048 use the heuristic below. */
26049 if (algs->size[i].max == 0)
26051 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
26053 enum stringop_alg candidate = algs->size[i].alg;
26055 if (candidate != libcall
26056 && alg_usable_p (candidate, memset, have_as))
26059 alg_noalign = algs->size[i].noalign;
26061 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
26062 last non-libcall inline algorithm. */
26063 if (TARGET_INLINE_ALL_STRINGOPS)
26065 /* When the current size is best to be copied by a libcall,
26066 but we are still forced to inline, run the heuristic below
26067 that will pick code for medium sized blocks. */
26068 if (alg != libcall)
26070 *noalign = alg_noalign;
26073 else if (!any_alg_usable_p)
26076 else if (alg_usable_p (candidate, memset, have_as))
26078 *noalign = algs->size[i].noalign;
26084 /* When asked to inline the call anyway, try to pick meaningful choice.
26085 We look for maximal size of block that is faster to copy by hand and
26086 take blocks of at most of that size guessing that average size will
26087 be roughly half of the block.
26089 If this turns out to be bad, we might simply specify the preferred
26090 choice in ix86_costs. */
26091 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26092 && (algs->unknown_size == libcall
26093 || !alg_usable_p (algs->unknown_size, memset, have_as)))
26095 enum stringop_alg alg;
26097 /* If there aren't any usable algorithms, then recursing on
26098 smaller sizes isn't going to find anything. Just return the
26099 simple byte-at-a-time copy loop. */
26100 if (!any_alg_usable_p)
26102 /* Pick something reasonable. */
26103 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26104 *dynamic_check = 128;
26105 return loop_1_byte;
26109 alg = decide_alg (count, max / 2, min_size, max_size, memset,
26110 zero_memset, have_as, dynamic_check, noalign);
26111 gcc_assert (*dynamic_check == -1);
26112 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
26113 *dynamic_check = max;
26115 gcc_assert (alg != libcall);
26118 return (alg_usable_p (algs->unknown_size, memset, have_as)
26119 ? algs->unknown_size : libcall);
26122 /* Decide on alignment. We know that the operand is already aligned to ALIGN
26123 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
26125 decide_alignment (int align,
26126 enum stringop_alg alg,
26128 machine_mode move_mode)
26130 int desired_align = 0;
26132 gcc_assert (alg != no_stringop);
26134 if (alg == libcall)
26136 if (move_mode == VOIDmode)
26139 desired_align = GET_MODE_SIZE (move_mode);
26140 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
26141 copying whole cacheline at once. */
26142 if (TARGET_PENTIUMPRO
26143 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
26148 if (desired_align < align)
26149 desired_align = align;
26150 if (expected_size != -1 && expected_size < 4)
26151 desired_align = align;
26153 return desired_align;
26157 /* Helper function for memcpy. For QImode value 0xXY produce
26158 0xXYXYXYXY of wide specified by MODE. This is essentially
26159 a * 0x10101010, but we can do slightly better than
26160 synth_mult by unwinding the sequence by hand on CPUs with
26163 promote_duplicated_reg (machine_mode mode, rtx val)
26165 machine_mode valmode = GET_MODE (val);
26167 int nops = mode == DImode ? 3 : 2;
26169 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
26170 if (val == const0_rtx)
26171 return copy_to_mode_reg (mode, CONST0_RTX (mode));
26172 if (CONST_INT_P (val))
26174 HOST_WIDE_INT v = INTVAL (val) & 255;
26178 if (mode == DImode)
26179 v |= (v << 16) << 16;
26180 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
26183 if (valmode == VOIDmode)
26185 if (valmode != QImode)
26186 val = gen_lowpart (QImode, val);
26187 if (mode == QImode)
26189 if (!TARGET_PARTIAL_REG_STALL)
26191 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
26192 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
26193 <= (ix86_cost->shift_const + ix86_cost->add) * nops
26194 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
26196 rtx reg = convert_modes (mode, QImode, val, true);
26197 tmp = promote_duplicated_reg (mode, const1_rtx);
26198 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
26203 rtx reg = convert_modes (mode, QImode, val, true);
26205 if (!TARGET_PARTIAL_REG_STALL)
26206 if (mode == SImode)
26207 emit_insn (gen_insvsi_1 (reg, reg));
26209 emit_insn (gen_insvdi_1 (reg, reg));
26212 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
26213 NULL, 1, OPTAB_DIRECT);
26215 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26217 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
26218 NULL, 1, OPTAB_DIRECT);
26219 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26220 if (mode == SImode)
26222 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
26223 NULL, 1, OPTAB_DIRECT);
26224 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
26229 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
26230 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
26231 alignment from ALIGN to DESIRED_ALIGN. */
26233 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
26239 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
26240 promoted_val = promote_duplicated_reg (DImode, val);
26241 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
26242 promoted_val = promote_duplicated_reg (SImode, val);
26243 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
26244 promoted_val = promote_duplicated_reg (HImode, val);
26246 promoted_val = val;
26248 return promoted_val;
26251 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
26252 operations when profitable. The code depends upon architecture, block size
26253 and alignment, but always has one of the following overall structures:
26255 Aligned move sequence:
26257 1) Prologue guard: Conditional that jumps up to epilogues for small
26258 blocks that can be handled by epilogue alone. This is faster
26259 but also needed for correctness, since prologue assume the block
26260 is larger than the desired alignment.
26262 Optional dynamic check for size and libcall for large
26263 blocks is emitted here too, with -minline-stringops-dynamically.
26265 2) Prologue: copy first few bytes in order to get destination
26266 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
26267 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
26268 copied. We emit either a jump tree on power of two sized
26269 blocks, or a byte loop.
26271 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26272 with specified algorithm.
26274 4) Epilogue: code copying tail of the block that is too small to be
26275 handled by main body (or up to size guarded by prologue guard).
26277 Misaligned move sequence
26279 1) missaligned move prologue/epilogue containing:
26280 a) Prologue handling small memory blocks and jumping to done_label
26281 (skipped if blocks are known to be large enough)
26282 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
26283 needed by single possibly misaligned move
26284 (skipped if alignment is not needed)
26285 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
26287 2) Zero size guard dispatching to done_label, if needed
26289 3) dispatch to library call, if needed,
26291 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
26292 with specified algorithm. */
26294 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
26295 rtx align_exp, rtx expected_align_exp,
26296 rtx expected_size_exp, rtx min_size_exp,
26297 rtx max_size_exp, rtx probable_max_size_exp,
26302 rtx_code_label *label = NULL;
26304 rtx_code_label *jump_around_label = NULL;
26305 HOST_WIDE_INT align = 1;
26306 unsigned HOST_WIDE_INT count = 0;
26307 HOST_WIDE_INT expected_size = -1;
26308 int size_needed = 0, epilogue_size_needed;
26309 int desired_align = 0, align_bytes = 0;
26310 enum stringop_alg alg;
26311 rtx promoted_val = NULL;
26312 rtx vec_promoted_val = NULL;
26313 bool force_loopy_epilogue = false;
26315 bool need_zero_guard = false;
26317 machine_mode move_mode = VOIDmode;
26318 int unroll_factor = 1;
26319 /* TODO: Once value ranges are available, fill in proper data. */
26320 unsigned HOST_WIDE_INT min_size = 0;
26321 unsigned HOST_WIDE_INT max_size = -1;
26322 unsigned HOST_WIDE_INT probable_max_size = -1;
26323 bool misaligned_prologue_used = false;
26326 if (CONST_INT_P (align_exp))
26327 align = INTVAL (align_exp);
26328 /* i386 can do misaligned access on reasonably increased cost. */
26329 if (CONST_INT_P (expected_align_exp)
26330 && INTVAL (expected_align_exp) > align)
26331 align = INTVAL (expected_align_exp);
26332 /* ALIGN is the minimum of destination and source alignment, but we care here
26333 just about destination alignment. */
26335 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
26336 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
26338 if (CONST_INT_P (count_exp))
26340 min_size = max_size = probable_max_size = count = expected_size
26341 = INTVAL (count_exp);
26342 /* When COUNT is 0, there is nothing to do. */
26349 min_size = INTVAL (min_size_exp);
26351 max_size = INTVAL (max_size_exp);
26352 if (probable_max_size_exp)
26353 probable_max_size = INTVAL (probable_max_size_exp);
26354 if (CONST_INT_P (expected_size_exp))
26355 expected_size = INTVAL (expected_size_exp);
26358 /* Make sure we don't need to care about overflow later on. */
26359 if (count > (HOST_WIDE_INT_1U << 30))
26362 have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
26364 have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
26366 /* Step 0: Decide on preferred algorithm, desired alignment and
26367 size of chunks to be copied by main loop. */
26368 alg = decide_alg (count, expected_size, min_size, probable_max_size,
26370 issetmem && val_exp == const0_rtx, have_as,
26371 &dynamic_check, &noalign);
26372 if (alg == libcall)
26374 gcc_assert (alg != no_stringop);
26376 /* For now vector-version of memset is generated only for memory zeroing, as
26377 creating of promoted vector value is very cheap in this case. */
26378 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
26379 alg = unrolled_loop;
26382 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
26383 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
26385 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
26388 move_mode = word_mode;
26394 gcc_unreachable ();
26396 need_zero_guard = true;
26397 move_mode = QImode;
26400 need_zero_guard = true;
26402 case unrolled_loop:
26403 need_zero_guard = true;
26404 unroll_factor = (TARGET_64BIT ? 4 : 2);
26407 need_zero_guard = true;
26409 /* Find the widest supported mode. */
26410 move_mode = word_mode;
26411 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
26412 != CODE_FOR_nothing)
26413 move_mode = GET_MODE_WIDER_MODE (move_mode);
26415 /* Find the corresponding vector mode with the same size as MOVE_MODE.
26416 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
26417 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
26419 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
26420 move_mode = mode_for_vector (word_mode, nunits);
26421 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
26422 move_mode = word_mode;
26424 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
26426 case rep_prefix_8_byte:
26427 move_mode = DImode;
26429 case rep_prefix_4_byte:
26430 move_mode = SImode;
26432 case rep_prefix_1_byte:
26433 move_mode = QImode;
26436 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
26437 epilogue_size_needed = size_needed;
26439 desired_align = decide_alignment (align, alg, expected_size, move_mode);
26440 if (!TARGET_ALIGN_STRINGOPS || noalign)
26441 align = desired_align;
26443 /* Step 1: Prologue guard. */
26445 /* Alignment code needs count to be in register. */
26446 if (CONST_INT_P (count_exp) && desired_align > align)
26448 if (INTVAL (count_exp) > desired_align
26449 && INTVAL (count_exp) > size_needed)
26452 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
26453 if (align_bytes <= 0)
26456 align_bytes = desired_align - align_bytes;
26458 if (align_bytes == 0)
26459 count_exp = force_reg (counter_mode (count_exp), count_exp);
26461 gcc_assert (desired_align >= 1 && align >= 1);
26463 /* Misaligned move sequences handle both prologue and epilogue at once.
26464 Default code generation results in a smaller code for large alignments
26465 and also avoids redundant job when sizes are known precisely. */
26466 misaligned_prologue_used
26467 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
26468 && MAX (desired_align, epilogue_size_needed) <= 32
26469 && desired_align <= epilogue_size_needed
26470 && ((desired_align > align && !align_bytes)
26471 || (!count && epilogue_size_needed > 1)));
26473 /* Do the cheap promotion to allow better CSE across the
26474 main loop and epilogue (ie one load of the big constant in the
26476 For now the misaligned move sequences do not have fast path
26477 without broadcasting. */
26478 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
26480 if (alg == vector_loop)
26482 gcc_assert (val_exp == const0_rtx);
26483 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
26484 promoted_val = promote_duplicated_reg_to_size (val_exp,
26485 GET_MODE_SIZE (word_mode),
26486 desired_align, align);
26490 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26491 desired_align, align);
26494 /* Misaligned move sequences handles both prologues and epilogues at once.
26495 Default code generation results in smaller code for large alignments and
26496 also avoids redundant job when sizes are known precisely. */
26497 if (misaligned_prologue_used)
26499 /* Misaligned move prologue handled small blocks by itself. */
26500 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
26501 (dst, src, &destreg, &srcreg,
26502 move_mode, promoted_val, vec_promoted_val,
26504 &jump_around_label,
26505 desired_align < align
26506 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
26507 desired_align, align, &min_size, dynamic_check, issetmem);
26509 src = change_address (src, BLKmode, srcreg);
26510 dst = change_address (dst, BLKmode, destreg);
26511 set_mem_align (dst, desired_align * BITS_PER_UNIT);
26512 epilogue_size_needed = 0;
26513 if (need_zero_guard
26514 && min_size < (unsigned HOST_WIDE_INT) size_needed)
26516 /* It is possible that we copied enough so the main loop will not
26518 gcc_assert (size_needed > 1);
26519 if (jump_around_label == NULL_RTX)
26520 jump_around_label = gen_label_rtx ();
26521 emit_cmp_and_jump_insns (count_exp,
26522 GEN_INT (size_needed),
26523 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
26524 if (expected_size == -1
26525 || expected_size < (desired_align - align) / 2 + size_needed)
26526 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26528 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26531 /* Ensure that alignment prologue won't copy past end of block. */
26532 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
26534 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
26535 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
26536 Make sure it is power of 2. */
26537 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
26539 /* To improve performance of small blocks, we jump around the VAL
26540 promoting mode. This mean that if the promoted VAL is not constant,
26541 we might not use it in the epilogue and have to use byte
26543 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
26544 force_loopy_epilogue = true;
26545 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26546 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26548 /* If main algorithm works on QImode, no epilogue is needed.
26549 For small sizes just don't align anything. */
26550 if (size_needed == 1)
26551 desired_align = align;
26556 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
26558 label = gen_label_rtx ();
26559 emit_cmp_and_jump_insns (count_exp,
26560 GEN_INT (epilogue_size_needed),
26561 LTU, 0, counter_mode (count_exp), 1, label);
26562 if (expected_size == -1 || expected_size < epilogue_size_needed)
26563 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26565 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26569 /* Emit code to decide on runtime whether library call or inline should be
26571 if (dynamic_check != -1)
26573 if (!issetmem && CONST_INT_P (count_exp))
26575 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
26577 emit_block_move_via_libcall (dst, src, count_exp, false);
26578 count_exp = const0_rtx;
26584 rtx_code_label *hot_label = gen_label_rtx ();
26585 if (jump_around_label == NULL_RTX)
26586 jump_around_label = gen_label_rtx ();
26587 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
26588 LEU, 0, counter_mode (count_exp),
26590 predict_jump (REG_BR_PROB_BASE * 90 / 100);
26592 set_storage_via_libcall (dst, count_exp, val_exp, false);
26594 emit_block_move_via_libcall (dst, src, count_exp, false);
26595 emit_jump (jump_around_label);
26596 emit_label (hot_label);
26600 /* Step 2: Alignment prologue. */
26601 /* Do the expensive promotion once we branched off the small blocks. */
26602 if (issetmem && !promoted_val)
26603 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
26604 desired_align, align);
26606 if (desired_align > align && !misaligned_prologue_used)
26608 if (align_bytes == 0)
26610 /* Except for the first move in prologue, we no longer know
26611 constant offset in aliasing info. It don't seems to worth
26612 the pain to maintain it for the first move, so throw away
26614 dst = change_address (dst, BLKmode, destreg);
26616 src = change_address (src, BLKmode, srcreg);
26617 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
26618 promoted_val, vec_promoted_val,
26619 count_exp, align, desired_align,
26621 /* At most desired_align - align bytes are copied. */
26622 if (min_size < (unsigned)(desired_align - align))
26625 min_size -= desired_align - align;
26629 /* If we know how many bytes need to be stored before dst is
26630 sufficiently aligned, maintain aliasing info accurately. */
26631 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
26639 count_exp = plus_constant (counter_mode (count_exp),
26640 count_exp, -align_bytes);
26641 count -= align_bytes;
26642 min_size -= align_bytes;
26643 max_size -= align_bytes;
26645 if (need_zero_guard
26646 && min_size < (unsigned HOST_WIDE_INT) size_needed
26647 && (count < (unsigned HOST_WIDE_INT) size_needed
26648 || (align_bytes == 0
26649 && count < ((unsigned HOST_WIDE_INT) size_needed
26650 + desired_align - align))))
26652 /* It is possible that we copied enough so the main loop will not
26654 gcc_assert (size_needed > 1);
26655 if (label == NULL_RTX)
26656 label = gen_label_rtx ();
26657 emit_cmp_and_jump_insns (count_exp,
26658 GEN_INT (size_needed),
26659 LTU, 0, counter_mode (count_exp), 1, label);
26660 if (expected_size == -1
26661 || expected_size < (desired_align - align) / 2 + size_needed)
26662 predict_jump (REG_BR_PROB_BASE * 20 / 100);
26664 predict_jump (REG_BR_PROB_BASE * 60 / 100);
26667 if (label && size_needed == 1)
26669 emit_label (label);
26670 LABEL_NUSES (label) = 1;
26672 epilogue_size_needed = 1;
26674 promoted_val = val_exp;
26676 else if (label == NULL_RTX && !misaligned_prologue_used)
26677 epilogue_size_needed = size_needed;
26679 /* Step 3: Main loop. */
26686 gcc_unreachable ();
26689 case unrolled_loop:
26690 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
26691 count_exp, move_mode, unroll_factor,
26692 expected_size, issetmem);
26695 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
26696 vec_promoted_val, count_exp, move_mode,
26697 unroll_factor, expected_size, issetmem);
26699 case rep_prefix_8_byte:
26700 case rep_prefix_4_byte:
26701 case rep_prefix_1_byte:
26702 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
26703 val_exp, count_exp, move_mode, issetmem);
26706 /* Adjust properly the offset of src and dest memory for aliasing. */
26707 if (CONST_INT_P (count_exp))
26710 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
26711 (count / size_needed) * size_needed);
26712 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
26713 (count / size_needed) * size_needed);
26718 src = change_address (src, BLKmode, srcreg);
26719 dst = change_address (dst, BLKmode, destreg);
26722 /* Step 4: Epilogue to copy the remaining bytes. */
26726 /* When the main loop is done, COUNT_EXP might hold original count,
26727 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
26728 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
26729 bytes. Compensate if needed. */
26731 if (size_needed < epilogue_size_needed)
26734 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
26735 GEN_INT (size_needed - 1), count_exp, 1,
26737 if (tmp != count_exp)
26738 emit_move_insn (count_exp, tmp);
26740 emit_label (label);
26741 LABEL_NUSES (label) = 1;
26744 if (count_exp != const0_rtx && epilogue_size_needed > 1)
26746 if (force_loopy_epilogue)
26747 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
26748 epilogue_size_needed);
26752 expand_setmem_epilogue (dst, destreg, promoted_val,
26753 vec_promoted_val, count_exp,
26754 epilogue_size_needed);
26756 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
26757 epilogue_size_needed);
26760 if (jump_around_label)
26761 emit_label (jump_around_label);
26766 /* Expand the appropriate insns for doing strlen if not just doing
26769 out = result, initialized with the start address
26770 align_rtx = alignment of the address.
26771 scratch = scratch register, initialized with the startaddress when
26772 not aligned, otherwise undefined
26774 This is just the body. It needs the initializations mentioned above and
26775 some address computing at the end. These things are done in i386.md. */
26778 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
26782 rtx_code_label *align_2_label = NULL;
26783 rtx_code_label *align_3_label = NULL;
26784 rtx_code_label *align_4_label = gen_label_rtx ();
26785 rtx_code_label *end_0_label = gen_label_rtx ();
26787 rtx tmpreg = gen_reg_rtx (SImode);
26788 rtx scratch = gen_reg_rtx (SImode);
26792 if (CONST_INT_P (align_rtx))
26793 align = INTVAL (align_rtx);
26795 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
26797 /* Is there a known alignment and is it less than 4? */
26800 rtx scratch1 = gen_reg_rtx (Pmode);
26801 emit_move_insn (scratch1, out);
26802 /* Is there a known alignment and is it not 2? */
26805 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
26806 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
26808 /* Leave just the 3 lower bits. */
26809 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
26810 NULL_RTX, 0, OPTAB_WIDEN);
26812 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26813 Pmode, 1, align_4_label);
26814 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
26815 Pmode, 1, align_2_label);
26816 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
26817 Pmode, 1, align_3_label);
26821 /* Since the alignment is 2, we have to check 2 or 0 bytes;
26822 check if is aligned to 4 - byte. */
26824 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
26825 NULL_RTX, 0, OPTAB_WIDEN);
26827 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
26828 Pmode, 1, align_4_label);
26831 mem = change_address (src, QImode, out);
26833 /* Now compare the bytes. */
26835 /* Compare the first n unaligned byte on a byte per byte basis. */
26836 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
26837 QImode, 1, end_0_label);
26839 /* Increment the address. */
26840 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26842 /* Not needed with an alignment of 2 */
26845 emit_label (align_2_label);
26847 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26850 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26852 emit_label (align_3_label);
26855 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
26858 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
26861 /* Generate loop to check 4 bytes at a time. It is not a good idea to
26862 align this loop. It gives only huge programs, but does not help to
26864 emit_label (align_4_label);
26866 mem = change_address (src, SImode, out);
26867 emit_move_insn (scratch, mem);
26868 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
26870 /* This formula yields a nonzero result iff one of the bytes is zero.
26871 This saves three branches inside loop and many cycles. */
26873 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
26874 emit_insn (gen_one_cmplsi2 (scratch, scratch));
26875 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
26876 emit_insn (gen_andsi3 (tmpreg, tmpreg,
26877 gen_int_mode (0x80808080, SImode)));
26878 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
26883 rtx reg = gen_reg_rtx (SImode);
26884 rtx reg2 = gen_reg_rtx (Pmode);
26885 emit_move_insn (reg, tmpreg);
26886 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
26888 /* If zero is not in the first two bytes, move two bytes forward. */
26889 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26890 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26891 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26892 emit_insn (gen_rtx_SET (tmpreg,
26893 gen_rtx_IF_THEN_ELSE (SImode, tmp,
26896 /* Emit lea manually to avoid clobbering of flags. */
26897 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
26899 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26900 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
26901 emit_insn (gen_rtx_SET (out,
26902 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
26908 rtx_code_label *end_2_label = gen_label_rtx ();
26909 /* Is zero in the first two bytes? */
26911 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
26912 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
26913 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
26914 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26915 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
26917 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
26918 JUMP_LABEL (tmp) = end_2_label;
26920 /* Not in the first two. Move two bytes forward. */
26921 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
26922 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
26924 emit_label (end_2_label);
26928 /* Avoid branch in fixing the byte. */
26929 tmpreg = gen_lowpart (QImode, tmpreg);
26930 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
26931 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
26932 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
26933 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
26935 emit_label (end_0_label);
26938 /* Expand strlen. */
26941 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
26943 rtx addr, scratch1, scratch2, scratch3, scratch4;
26945 /* The generic case of strlen expander is long. Avoid it's
26946 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
26948 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26949 && !TARGET_INLINE_ALL_STRINGOPS
26950 && !optimize_insn_for_size_p ()
26951 && (!CONST_INT_P (align) || INTVAL (align) < 4))
26954 addr = force_reg (Pmode, XEXP (src, 0));
26955 scratch1 = gen_reg_rtx (Pmode);
26957 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
26958 && !optimize_insn_for_size_p ())
26960 /* Well it seems that some optimizer does not combine a call like
26961 foo(strlen(bar), strlen(bar));
26962 when the move and the subtraction is done here. It does calculate
26963 the length just once when these instructions are done inside of
26964 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
26965 often used and I use one fewer register for the lifetime of
26966 output_strlen_unroll() this is better. */
26968 emit_move_insn (out, addr);
26970 ix86_expand_strlensi_unroll_1 (out, src, align);
26972 /* strlensi_unroll_1 returns the address of the zero at the end of
26973 the string, like memchr(), so compute the length by subtracting
26974 the start address. */
26975 emit_insn (ix86_gen_sub3 (out, out, addr));
26981 /* Can't use this if the user has appropriated eax, ecx, or edi. */
26982 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
26984 /* Can't use this for non-default address spaces. */
26985 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)))
26988 scratch2 = gen_reg_rtx (Pmode);
26989 scratch3 = gen_reg_rtx (Pmode);
26990 scratch4 = force_reg (Pmode, constm1_rtx);
26992 emit_move_insn (scratch3, addr);
26993 eoschar = force_reg (QImode, eoschar);
26995 src = replace_equiv_address_nv (src, scratch3);
26997 /* If .md starts supporting :P, this can be done in .md. */
26998 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
26999 scratch4), UNSPEC_SCAS);
27000 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
27001 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
27002 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
27007 /* For given symbol (function) construct code to compute address of it's PLT
27008 entry in large x86-64 PIC model. */
27010 construct_plt_address (rtx symbol)
27014 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
27015 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
27016 gcc_assert (Pmode == DImode);
27018 tmp = gen_reg_rtx (Pmode);
27019 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
27021 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
27022 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
27027 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
27029 rtx pop, bool sibcall)
27032 rtx use = NULL, call;
27033 unsigned int vec_len = 0;
27035 if (pop == const0_rtx)
27037 gcc_assert (!TARGET_64BIT || !pop);
27039 if (TARGET_MACHO && !TARGET_64BIT)
27042 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
27043 fnaddr = machopic_indirect_call_target (fnaddr);
27048 /* Static functions and indirect calls don't need the pic register. Also,
27049 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
27050 it an indirect call. */
27051 rtx addr = XEXP (fnaddr, 0);
27053 && GET_CODE (addr) == SYMBOL_REF
27054 && !SYMBOL_REF_LOCAL_P (addr))
27057 && (SYMBOL_REF_DECL (addr) == NULL_TREE
27058 || !lookup_attribute ("noplt",
27059 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr)))))
27062 || (ix86_cmodel == CM_LARGE_PIC
27063 && DEFAULT_ABI != MS_ABI))
27065 use_reg (&use, gen_rtx_REG (Pmode,
27066 REAL_PIC_OFFSET_TABLE_REGNUM));
27067 if (ix86_use_pseudo_pic_reg ())
27068 emit_move_insn (gen_rtx_REG (Pmode,
27069 REAL_PIC_OFFSET_TABLE_REGNUM),
27070 pic_offset_table_rtx);
27073 else if (!TARGET_PECOFF && !TARGET_MACHO)
27077 fnaddr = gen_rtx_UNSPEC (Pmode,
27078 gen_rtvec (1, addr),
27080 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27084 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
27086 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
27087 fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
27090 fnaddr = gen_const_mem (Pmode, fnaddr);
27091 /* Pmode may not be the same as word_mode for x32, which
27092 doesn't support indirect branch via 32-bit memory slot.
27093 Since x32 GOT slot is 64 bit with zero upper 32 bits,
27094 indirect branch via x32 GOT slot is OK. */
27095 if (GET_MODE (fnaddr) != word_mode)
27096 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
27097 fnaddr = gen_rtx_MEM (QImode, fnaddr);
27102 /* Skip setting up RAX register for -mskip-rax-setup when there are no
27103 parameters passed in vector registers. */
27105 && (INTVAL (callarg2) > 0
27106 || (INTVAL (callarg2) == 0
27107 && (TARGET_SSE || !flag_skip_rax_setup))))
27109 rtx al = gen_rtx_REG (QImode, AX_REG);
27110 emit_move_insn (al, callarg2);
27111 use_reg (&use, al);
27114 if (ix86_cmodel == CM_LARGE_PIC
27117 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
27118 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
27119 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
27120 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
27121 branch via x32 GOT slot is OK. */
27122 else if (!(TARGET_X32
27124 && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
27125 && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
27127 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
27128 : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
27130 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
27131 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
27134 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
27138 /* We should add bounds as destination register in case
27139 pointer with bounds may be returned. */
27140 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
27142 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
27143 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
27144 if (GET_CODE (retval) == PARALLEL)
27146 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
27147 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
27148 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
27149 retval = chkp_join_splitted_slot (retval, par);
27153 retval = gen_rtx_PARALLEL (VOIDmode,
27154 gen_rtvec (3, retval, b0, b1));
27155 chkp_put_regs_to_expr_list (retval);
27159 call = gen_rtx_SET (retval, call);
27161 vec[vec_len++] = call;
27165 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
27166 pop = gen_rtx_SET (stack_pointer_rtx, pop);
27167 vec[vec_len++] = pop;
27170 if (TARGET_64BIT_MS_ABI
27171 && (!callarg2 || INTVAL (callarg2) != -2))
27173 int const cregs_size
27174 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
27177 for (i = 0; i < cregs_size; i++)
27179 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
27180 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
27182 clobber_reg (&use, gen_rtx_REG (mode, regno));
27187 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
27188 call = emit_call_insn (call);
27190 CALL_INSN_FUNCTION_USAGE (call) = use;
27195 /* Return true if the function being called was marked with attribute "noplt"
27196 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
27197 handle the non-PIC case in the backend because there is no easy interface
27198 for the front-end to force non-PLT calls to use the GOT. This is currently
27199 used only with 64-bit ELF targets to call the function marked "noplt"
27203 ix86_nopic_noplt_attribute_p (rtx call_op)
27205 if (flag_pic || ix86_cmodel == CM_LARGE
27206 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
27207 || SYMBOL_REF_LOCAL_P (call_op))
27210 tree symbol_decl = SYMBOL_REF_DECL (call_op);
27213 || (symbol_decl != NULL_TREE
27214 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
27220 /* Output the assembly for a call instruction. */
27223 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
27225 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
27226 bool seh_nop_p = false;
27229 if (SIBLING_CALL_P (insn))
27231 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27232 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
27234 xasm = "%!jmp\t%P0";
27235 /* SEH epilogue detection requires the indirect branch case
27236 to include REX.W. */
27237 else if (TARGET_SEH)
27238 xasm = "%!rex.W jmp %A0";
27240 xasm = "%!jmp\t%A0";
27242 output_asm_insn (xasm, &call_op);
27246 /* SEH unwinding can require an extra nop to be emitted in several
27247 circumstances. Determine if we have one of those. */
27252 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
27254 /* If we get to another real insn, we don't need the nop. */
27258 /* If we get to the epilogue note, prevent a catch region from
27259 being adjacent to the standard epilogue sequence. If non-
27260 call-exceptions, we'll have done this during epilogue emission. */
27261 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
27262 && !flag_non_call_exceptions
27263 && !can_throw_internal (insn))
27270 /* If we didn't find a real insn following the call, prevent the
27271 unwinder from looking into the next function. */
27276 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
27277 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
27279 xasm = "%!call\t%P0";
27281 xasm = "%!call\t%A0";
27283 output_asm_insn (xasm, &call_op);
27291 /* Clear stack slot assignments remembered from previous functions.
27292 This is called from INIT_EXPANDERS once before RTL is emitted for each
27295 static struct machine_function *
27296 ix86_init_machine_status (void)
27298 struct machine_function *f;
27300 f = ggc_cleared_alloc<machine_function> ();
27301 f->use_fast_prologue_epilogue_nregs = -1;
27302 f->call_abi = ix86_abi;
27307 /* Return a MEM corresponding to a stack slot with mode MODE.
27308 Allocate a new slot if necessary.
27310 The RTL for a function can have several slots available: N is
27311 which slot to use. */
27314 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
27316 struct stack_local_entry *s;
27318 gcc_assert (n < MAX_386_STACK_LOCALS);
27320 for (s = ix86_stack_locals; s; s = s->next)
27321 if (s->mode == mode && s->n == n)
27322 return validize_mem (copy_rtx (s->rtl));
27324 s = ggc_alloc<stack_local_entry> ();
27327 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
27329 s->next = ix86_stack_locals;
27330 ix86_stack_locals = s;
27331 return validize_mem (copy_rtx (s->rtl));
27335 ix86_instantiate_decls (void)
27337 struct stack_local_entry *s;
27339 for (s = ix86_stack_locals; s; s = s->next)
27340 if (s->rtl != NULL_RTX)
27341 instantiate_decl_rtl (s->rtl);
27344 /* Return the number used for encoding REG, in the range 0..7. */
27347 reg_encoded_number (rtx reg)
27349 unsigned regno = REGNO (reg);
27371 if (IN_RANGE (regno, FIRST_STACK_REG, LAST_STACK_REG))
27372 return regno - FIRST_STACK_REG;
27373 if (IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG))
27374 return regno - FIRST_SSE_REG;
27375 if (IN_RANGE (regno, FIRST_MMX_REG, LAST_MMX_REG))
27376 return regno - FIRST_MMX_REG;
27377 if (IN_RANGE (regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
27378 return regno - FIRST_REX_SSE_REG;
27379 if (IN_RANGE (regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
27380 return regno - FIRST_REX_INT_REG;
27381 if (IN_RANGE (regno, FIRST_MASK_REG, LAST_MASK_REG))
27382 return regno - FIRST_MASK_REG;
27383 if (IN_RANGE (regno, FIRST_BND_REG, LAST_BND_REG))
27384 return regno - FIRST_BND_REG;
27388 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
27389 in its encoding if it could be relevant for ROP mitigation, otherwise
27390 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
27391 used for calculating it into them. */
27394 ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
27395 int *popno0 = 0, int *popno1 = 0)
27397 if (asm_noperands (PATTERN (insn)) >= 0)
27399 int has_modrm = get_attr_modrm (insn);
27402 enum attr_modrm_class cls = get_attr_modrm_class (insn);
27406 case MODRM_CLASS_OP02:
27407 gcc_assert (noperands >= 3);
27416 case MODRM_CLASS_OP01:
27417 gcc_assert (noperands >= 2);
27429 if (REG_P (op0) && REG_P (op1))
27431 int enc0 = reg_encoded_number (op0);
27432 int enc1 = reg_encoded_number (op1);
27433 return 0xc0 + (enc1 << 3) + enc0;
27438 /* Check whether x86 address PARTS is a pc-relative address. */
27441 rip_relative_addr_p (struct ix86_address *parts)
27443 rtx base, index, disp;
27445 base = parts->base;
27446 index = parts->index;
27447 disp = parts->disp;
27449 if (disp && !base && !index)
27455 if (GET_CODE (disp) == CONST)
27456 symbol = XEXP (disp, 0);
27457 if (GET_CODE (symbol) == PLUS
27458 && CONST_INT_P (XEXP (symbol, 1)))
27459 symbol = XEXP (symbol, 0);
27461 if (GET_CODE (symbol) == LABEL_REF
27462 || (GET_CODE (symbol) == SYMBOL_REF
27463 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
27464 || (GET_CODE (symbol) == UNSPEC
27465 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
27466 || XINT (symbol, 1) == UNSPEC_PCREL
27467 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
27474 /* Calculate the length of the memory address in the instruction encoding.
27475 Includes addr32 prefix, does not include the one-byte modrm, opcode,
27476 or other prefixes. We never generate addr32 prefix for LEA insn. */
27479 memory_address_length (rtx addr, bool lea)
27481 struct ix86_address parts;
27482 rtx base, index, disp;
27486 if (GET_CODE (addr) == PRE_DEC
27487 || GET_CODE (addr) == POST_INC
27488 || GET_CODE (addr) == PRE_MODIFY
27489 || GET_CODE (addr) == POST_MODIFY)
27492 ok = ix86_decompose_address (addr, &parts);
27495 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
27497 /* If this is not LEA instruction, add the length of addr32 prefix. */
27498 if (TARGET_64BIT && !lea
27499 && (SImode_address_operand (addr, VOIDmode)
27500 || (parts.base && GET_MODE (parts.base) == SImode)
27501 || (parts.index && GET_MODE (parts.index) == SImode)))
27505 index = parts.index;
27508 if (base && SUBREG_P (base))
27509 base = SUBREG_REG (base);
27510 if (index && SUBREG_P (index))
27511 index = SUBREG_REG (index);
27513 gcc_assert (base == NULL_RTX || REG_P (base));
27514 gcc_assert (index == NULL_RTX || REG_P (index));
27517 - esp as the base always wants an index,
27518 - ebp as the base always wants a displacement,
27519 - r12 as the base always wants an index,
27520 - r13 as the base always wants a displacement. */
27522 /* Register Indirect. */
27523 if (base && !index && !disp)
27525 /* esp (for its index) and ebp (for its displacement) need
27526 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
27528 if (base == arg_pointer_rtx
27529 || base == frame_pointer_rtx
27530 || REGNO (base) == SP_REG
27531 || REGNO (base) == BP_REG
27532 || REGNO (base) == R12_REG
27533 || REGNO (base) == R13_REG)
27537 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
27538 is not disp32, but disp32(%rip), so for disp32
27539 SIB byte is needed, unless print_operand_address
27540 optimizes it into disp32(%rip) or (%rip) is implied
27542 else if (disp && !base && !index)
27545 if (rip_relative_addr_p (&parts))
27550 /* Find the length of the displacement constant. */
27553 if (base && satisfies_constraint_K (disp))
27558 /* ebp always wants a displacement. Similarly r13. */
27559 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
27562 /* An index requires the two-byte modrm form.... */
27564 /* ...like esp (or r12), which always wants an index. */
27565 || base == arg_pointer_rtx
27566 || base == frame_pointer_rtx
27567 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
27574 /* Compute default value for "length_immediate" attribute. When SHORTFORM
27575 is set, expect that insn have 8bit immediate alternative. */
27577 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
27581 extract_insn_cached (insn);
27582 for (i = recog_data.n_operands - 1; i >= 0; --i)
27583 if (CONSTANT_P (recog_data.operand[i]))
27585 enum attr_mode mode = get_attr_mode (insn);
27588 if (shortform && CONST_INT_P (recog_data.operand[i]))
27590 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
27597 ival = trunc_int_for_mode (ival, HImode);
27600 ival = trunc_int_for_mode (ival, SImode);
27605 if (IN_RANGE (ival, -128, 127))
27622 /* Immediates for DImode instructions are encoded
27623 as 32bit sign extended values. */
27628 fatal_insn ("unknown insn mode", insn);
27634 /* Compute default value for "length_address" attribute. */
27636 ix86_attr_length_address_default (rtx_insn *insn)
27640 if (get_attr_type (insn) == TYPE_LEA)
27642 rtx set = PATTERN (insn), addr;
27644 if (GET_CODE (set) == PARALLEL)
27645 set = XVECEXP (set, 0, 0);
27647 gcc_assert (GET_CODE (set) == SET);
27649 addr = SET_SRC (set);
27651 return memory_address_length (addr, true);
27654 extract_insn_cached (insn);
27655 for (i = recog_data.n_operands - 1; i >= 0; --i)
27657 rtx op = recog_data.operand[i];
27660 constrain_operands_cached (insn, reload_completed);
27661 if (which_alternative != -1)
27663 const char *constraints = recog_data.constraints[i];
27664 int alt = which_alternative;
27666 while (*constraints == '=' || *constraints == '+')
27669 while (*constraints++ != ',')
27671 /* Skip ignored operands. */
27672 if (*constraints == 'X')
27676 int len = memory_address_length (XEXP (op, 0), false);
27678 /* Account for segment prefix for non-default addr spaces. */
27679 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
27688 /* Compute default value for "length_vex" attribute. It includes
27689 2 or 3 byte VEX prefix and 1 opcode byte. */
27692 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
27697 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
27698 byte VEX prefix. */
27699 if (!has_0f_opcode || has_vex_w)
27702 /* We can always use 2 byte VEX prefix in 32bit. */
27706 extract_insn_cached (insn);
27708 for (i = recog_data.n_operands - 1; i >= 0; --i)
27709 if (REG_P (recog_data.operand[i]))
27711 /* REX.W bit uses 3 byte VEX prefix. */
27712 if (GET_MODE (recog_data.operand[i]) == DImode
27713 && GENERAL_REG_P (recog_data.operand[i]))
27718 /* REX.X or REX.B bits use 3 byte VEX prefix. */
27719 if (MEM_P (recog_data.operand[i])
27720 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
27727 /* Return the maximum number of instructions a cpu can issue. */
27730 ix86_issue_rate (void)
27734 case PROCESSOR_PENTIUM:
27735 case PROCESSOR_LAKEMONT:
27736 case PROCESSOR_BONNELL:
27737 case PROCESSOR_SILVERMONT:
27738 case PROCESSOR_KNL:
27739 case PROCESSOR_INTEL:
27741 case PROCESSOR_BTVER2:
27742 case PROCESSOR_PENTIUM4:
27743 case PROCESSOR_NOCONA:
27746 case PROCESSOR_PENTIUMPRO:
27747 case PROCESSOR_ATHLON:
27749 case PROCESSOR_AMDFAM10:
27750 case PROCESSOR_GENERIC:
27751 case PROCESSOR_BTVER1:
27754 case PROCESSOR_BDVER1:
27755 case PROCESSOR_BDVER2:
27756 case PROCESSOR_BDVER3:
27757 case PROCESSOR_BDVER4:
27758 case PROCESSOR_ZNVER1:
27759 case PROCESSOR_CORE2:
27760 case PROCESSOR_NEHALEM:
27761 case PROCESSOR_SANDYBRIDGE:
27762 case PROCESSOR_HASWELL:
27770 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
27771 by DEP_INSN and nothing set by DEP_INSN. */
27774 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
27778 /* Simplify the test for uninteresting insns. */
27779 if (insn_type != TYPE_SETCC
27780 && insn_type != TYPE_ICMOV
27781 && insn_type != TYPE_FCMOV
27782 && insn_type != TYPE_IBR)
27785 if ((set = single_set (dep_insn)) != 0)
27787 set = SET_DEST (set);
27790 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
27791 && XVECLEN (PATTERN (dep_insn), 0) == 2
27792 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
27793 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
27795 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27796 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
27801 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
27804 /* This test is true if the dependent insn reads the flags but
27805 not any other potentially set register. */
27806 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
27809 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
27815 /* Return true iff USE_INSN has a memory address with operands set by
27819 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
27822 extract_insn_cached (use_insn);
27823 for (i = recog_data.n_operands - 1; i >= 0; --i)
27824 if (MEM_P (recog_data.operand[i]))
27826 rtx addr = XEXP (recog_data.operand[i], 0);
27827 return modified_in_p (addr, set_insn) != 0;
27832 /* Helper function for exact_store_load_dependency.
27833 Return true if addr is found in insn. */
27835 exact_dependency_1 (rtx addr, rtx insn)
27837 enum rtx_code code;
27838 const char *format_ptr;
27841 code = GET_CODE (insn);
27845 if (rtx_equal_p (addr, insn))
27860 format_ptr = GET_RTX_FORMAT (code);
27861 for (i = 0; i < GET_RTX_LENGTH (code); i++)
27863 switch (*format_ptr++)
27866 if (exact_dependency_1 (addr, XEXP (insn, i)))
27870 for (j = 0; j < XVECLEN (insn, i); j++)
27871 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
27879 /* Return true if there exists exact dependency for store & load, i.e.
27880 the same memory address is used in them. */
27882 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
27886 set1 = single_set (store);
27889 if (!MEM_P (SET_DEST (set1)))
27891 set2 = single_set (load);
27894 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
27900 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27902 enum attr_type insn_type, dep_insn_type;
27903 enum attr_memory memory;
27905 int dep_insn_code_number;
27907 /* Anti and output dependencies have zero cost on all CPUs. */
27908 if (REG_NOTE_KIND (link) != 0)
27911 dep_insn_code_number = recog_memoized (dep_insn);
27913 /* If we can't recognize the insns, we can't really do anything. */
27914 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
27917 insn_type = get_attr_type (insn);
27918 dep_insn_type = get_attr_type (dep_insn);
27922 case PROCESSOR_PENTIUM:
27923 case PROCESSOR_LAKEMONT:
27924 /* Address Generation Interlock adds a cycle of latency. */
27925 if (insn_type == TYPE_LEA)
27927 rtx addr = PATTERN (insn);
27929 if (GET_CODE (addr) == PARALLEL)
27930 addr = XVECEXP (addr, 0, 0);
27932 gcc_assert (GET_CODE (addr) == SET);
27934 addr = SET_SRC (addr);
27935 if (modified_in_p (addr, dep_insn))
27938 else if (ix86_agi_dependent (dep_insn, insn))
27941 /* ??? Compares pair with jump/setcc. */
27942 if (ix86_flags_dependent (insn, dep_insn, insn_type))
27945 /* Floating point stores require value to be ready one cycle earlier. */
27946 if (insn_type == TYPE_FMOV
27947 && get_attr_memory (insn) == MEMORY_STORE
27948 && !ix86_agi_dependent (dep_insn, insn))
27952 case PROCESSOR_PENTIUMPRO:
27953 /* INT->FP conversion is expensive. */
27954 if (get_attr_fp_int_src (dep_insn))
27957 /* There is one cycle extra latency between an FP op and a store. */
27958 if (insn_type == TYPE_FMOV
27959 && (set = single_set (dep_insn)) != NULL_RTX
27960 && (set2 = single_set (insn)) != NULL_RTX
27961 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
27962 && MEM_P (SET_DEST (set2)))
27965 memory = get_attr_memory (insn);
27967 /* Show ability of reorder buffer to hide latency of load by executing
27968 in parallel with previous instruction in case
27969 previous instruction is not needed to compute the address. */
27970 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
27971 && !ix86_agi_dependent (dep_insn, insn))
27973 /* Claim moves to take one cycle, as core can issue one load
27974 at time and the next load can start cycle later. */
27975 if (dep_insn_type == TYPE_IMOV
27976 || dep_insn_type == TYPE_FMOV)
27984 /* The esp dependency is resolved before
27985 the instruction is really finished. */
27986 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
27987 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
27990 /* INT->FP conversion is expensive. */
27991 if (get_attr_fp_int_src (dep_insn))
27994 memory = get_attr_memory (insn);
27996 /* Show ability of reorder buffer to hide latency of load by executing
27997 in parallel with previous instruction in case
27998 previous instruction is not needed to compute the address. */
27999 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28000 && !ix86_agi_dependent (dep_insn, insn))
28002 /* Claim moves to take one cycle, as core can issue one load
28003 at time and the next load can start cycle later. */
28004 if (dep_insn_type == TYPE_IMOV
28005 || dep_insn_type == TYPE_FMOV)
28014 case PROCESSOR_AMDFAM10:
28015 case PROCESSOR_BDVER1:
28016 case PROCESSOR_BDVER2:
28017 case PROCESSOR_BDVER3:
28018 case PROCESSOR_BDVER4:
28019 case PROCESSOR_ZNVER1:
28020 case PROCESSOR_BTVER1:
28021 case PROCESSOR_BTVER2:
28022 case PROCESSOR_GENERIC:
28023 /* Stack engine allows to execute push&pop instructions in parall. */
28024 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28025 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28029 case PROCESSOR_ATHLON:
28031 memory = get_attr_memory (insn);
28033 /* Show ability of reorder buffer to hide latency of load by executing
28034 in parallel with previous instruction in case
28035 previous instruction is not needed to compute the address. */
28036 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28037 && !ix86_agi_dependent (dep_insn, insn))
28039 enum attr_unit unit = get_attr_unit (insn);
28042 /* Because of the difference between the length of integer and
28043 floating unit pipeline preparation stages, the memory operands
28044 for floating point are cheaper.
28046 ??? For Athlon it the difference is most probably 2. */
28047 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
28050 loadcost = TARGET_ATHLON ? 2 : 0;
28052 if (cost >= loadcost)
28059 case PROCESSOR_CORE2:
28060 case PROCESSOR_NEHALEM:
28061 case PROCESSOR_SANDYBRIDGE:
28062 case PROCESSOR_HASWELL:
28063 /* Stack engine allows to execute push&pop instructions in parall. */
28064 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
28065 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
28068 memory = get_attr_memory (insn);
28070 /* Show ability of reorder buffer to hide latency of load by executing
28071 in parallel with previous instruction in case
28072 previous instruction is not needed to compute the address. */
28073 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28074 && !ix86_agi_dependent (dep_insn, insn))
28083 case PROCESSOR_SILVERMONT:
28084 case PROCESSOR_KNL:
28085 case PROCESSOR_INTEL:
28086 if (!reload_completed)
28089 /* Increase cost of integer loads. */
28090 memory = get_attr_memory (dep_insn);
28091 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
28093 enum attr_unit unit = get_attr_unit (dep_insn);
28094 if (unit == UNIT_INTEGER && cost == 1)
28096 if (memory == MEMORY_LOAD)
28100 /* Increase cost of ld/st for short int types only
28101 because of store forwarding issue. */
28102 rtx set = single_set (dep_insn);
28103 if (set && (GET_MODE (SET_DEST (set)) == QImode
28104 || GET_MODE (SET_DEST (set)) == HImode))
28106 /* Increase cost of store/load insn if exact
28107 dependence exists and it is load insn. */
28108 enum attr_memory insn_memory = get_attr_memory (insn);
28109 if (insn_memory == MEMORY_LOAD
28110 && exact_store_load_dependency (dep_insn, insn))
28124 /* How many alternative schedules to try. This should be as wide as the
28125 scheduling freedom in the DFA, but no wider. Making this value too
28126 large results extra work for the scheduler. */
28129 ia32_multipass_dfa_lookahead (void)
28133 case PROCESSOR_PENTIUM:
28134 case PROCESSOR_LAKEMONT:
28137 case PROCESSOR_PENTIUMPRO:
28141 case PROCESSOR_BDVER1:
28142 case PROCESSOR_BDVER2:
28143 case PROCESSOR_BDVER3:
28144 case PROCESSOR_BDVER4:
28145 /* We use lookahead value 4 for BD both before and after reload
28146 schedules. Plan is to have value 8 included for O3. */
28149 case PROCESSOR_CORE2:
28150 case PROCESSOR_NEHALEM:
28151 case PROCESSOR_SANDYBRIDGE:
28152 case PROCESSOR_HASWELL:
28153 case PROCESSOR_BONNELL:
28154 case PROCESSOR_SILVERMONT:
28155 case PROCESSOR_KNL:
28156 case PROCESSOR_INTEL:
28157 /* Generally, we want haifa-sched:max_issue() to look ahead as far
28158 as many instructions can be executed on a cycle, i.e.,
28159 issue_rate. I wonder why tuning for many CPUs does not do this. */
28160 if (reload_completed)
28161 return ix86_issue_rate ();
28162 /* Don't use lookahead for pre-reload schedule to save compile time. */
28170 /* Return true if target platform supports macro-fusion. */
28173 ix86_macro_fusion_p ()
28175 return TARGET_FUSE_CMP_AND_BRANCH;
28178 /* Check whether current microarchitecture support macro fusion
28179 for insn pair "CONDGEN + CONDJMP". Refer to
28180 "Intel Architectures Optimization Reference Manual". */
28183 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
28186 enum rtx_code ccode;
28187 rtx compare_set = NULL_RTX, test_if, cond;
28188 rtx alu_set = NULL_RTX, addr = NULL_RTX;
28190 if (!any_condjump_p (condjmp))
28193 if (get_attr_type (condgen) != TYPE_TEST
28194 && get_attr_type (condgen) != TYPE_ICMP
28195 && get_attr_type (condgen) != TYPE_INCDEC
28196 && get_attr_type (condgen) != TYPE_ALU)
28199 compare_set = single_set (condgen);
28200 if (compare_set == NULL_RTX
28201 && !TARGET_FUSE_ALU_AND_BRANCH)
28204 if (compare_set == NULL_RTX)
28207 rtx pat = PATTERN (condgen);
28208 for (i = 0; i < XVECLEN (pat, 0); i++)
28209 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
28211 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
28212 if (GET_CODE (set_src) == COMPARE)
28213 compare_set = XVECEXP (pat, 0, i);
28215 alu_set = XVECEXP (pat, 0, i);
28218 if (compare_set == NULL_RTX)
28220 src = SET_SRC (compare_set);
28221 if (GET_CODE (src) != COMPARE)
28224 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
28226 if ((MEM_P (XEXP (src, 0))
28227 && CONST_INT_P (XEXP (src, 1)))
28228 || (MEM_P (XEXP (src, 1))
28229 && CONST_INT_P (XEXP (src, 0))))
28232 /* No fusion for RIP-relative address. */
28233 if (MEM_P (XEXP (src, 0)))
28234 addr = XEXP (XEXP (src, 0), 0);
28235 else if (MEM_P (XEXP (src, 1)))
28236 addr = XEXP (XEXP (src, 1), 0);
28239 ix86_address parts;
28240 int ok = ix86_decompose_address (addr, &parts);
28243 if (rip_relative_addr_p (&parts))
28247 test_if = SET_SRC (pc_set (condjmp));
28248 cond = XEXP (test_if, 0);
28249 ccode = GET_CODE (cond);
28250 /* Check whether conditional jump use Sign or Overflow Flags. */
28251 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
28258 /* Return true for TYPE_TEST and TYPE_ICMP. */
28259 if (get_attr_type (condgen) == TYPE_TEST
28260 || get_attr_type (condgen) == TYPE_ICMP)
28263 /* The following is the case that macro-fusion for alu + jmp. */
28264 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
28267 /* No fusion for alu op with memory destination operand. */
28268 dest = SET_DEST (alu_set);
28272 /* Macro-fusion for inc/dec + unsigned conditional jump is not
28274 if (get_attr_type (condgen) == TYPE_INCDEC
28284 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
28285 execution. It is applied if
28286 (1) IMUL instruction is on the top of list;
28287 (2) There exists the only producer of independent IMUL instruction in
28289 Return index of IMUL producer if it was found and -1 otherwise. */
28291 do_reorder_for_imul (rtx_insn **ready, int n_ready)
28294 rtx set, insn1, insn2;
28295 sd_iterator_def sd_it;
28300 if (!TARGET_BONNELL)
28303 /* Check that IMUL instruction is on the top of ready list. */
28304 insn = ready[n_ready - 1];
28305 set = single_set (insn);
28308 if (!(GET_CODE (SET_SRC (set)) == MULT
28309 && GET_MODE (SET_SRC (set)) == SImode))
28312 /* Search for producer of independent IMUL instruction. */
28313 for (i = n_ready - 2; i >= 0; i--)
28316 if (!NONDEBUG_INSN_P (insn))
28318 /* Skip IMUL instruction. */
28319 insn2 = PATTERN (insn);
28320 if (GET_CODE (insn2) == PARALLEL)
28321 insn2 = XVECEXP (insn2, 0, 0);
28322 if (GET_CODE (insn2) == SET
28323 && GET_CODE (SET_SRC (insn2)) == MULT
28324 && GET_MODE (SET_SRC (insn2)) == SImode)
28327 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
28330 con = DEP_CON (dep);
28331 if (!NONDEBUG_INSN_P (con))
28333 insn1 = PATTERN (con);
28334 if (GET_CODE (insn1) == PARALLEL)
28335 insn1 = XVECEXP (insn1, 0, 0);
28337 if (GET_CODE (insn1) == SET
28338 && GET_CODE (SET_SRC (insn1)) == MULT
28339 && GET_MODE (SET_SRC (insn1)) == SImode)
28341 sd_iterator_def sd_it1;
28343 /* Check if there is no other dependee for IMUL. */
28345 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
28348 pro = DEP_PRO (dep1);
28349 if (!NONDEBUG_INSN_P (pro))
28364 /* Try to find the best candidate on the top of ready list if two insns
28365 have the same priority - candidate is best if its dependees were
28366 scheduled earlier. Applied for Silvermont only.
28367 Return true if top 2 insns must be interchanged. */
28369 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
28371 rtx_insn *top = ready[n_ready - 1];
28372 rtx_insn *next = ready[n_ready - 2];
28374 sd_iterator_def sd_it;
28378 #define INSN_TICK(INSN) (HID (INSN)->tick)
28380 if (!TARGET_SILVERMONT && !TARGET_INTEL)
28383 if (!NONDEBUG_INSN_P (top))
28385 if (!NONJUMP_INSN_P (top))
28387 if (!NONDEBUG_INSN_P (next))
28389 if (!NONJUMP_INSN_P (next))
28391 set = single_set (top);
28394 set = single_set (next);
28398 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
28400 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
28402 /* Determine winner more precise. */
28403 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
28406 pro = DEP_PRO (dep);
28407 if (!NONDEBUG_INSN_P (pro))
28409 if (INSN_TICK (pro) > clock1)
28410 clock1 = INSN_TICK (pro);
28412 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
28415 pro = DEP_PRO (dep);
28416 if (!NONDEBUG_INSN_P (pro))
28418 if (INSN_TICK (pro) > clock2)
28419 clock2 = INSN_TICK (pro);
28422 if (clock1 == clock2)
28424 /* Determine winner - load must win. */
28425 enum attr_memory memory1, memory2;
28426 memory1 = get_attr_memory (top);
28427 memory2 = get_attr_memory (next);
28428 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
28431 return (bool) (clock2 < clock1);
28437 /* Perform possible reodering of ready list for Atom/Silvermont only.
28438 Return issue rate. */
28440 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
28441 int *pn_ready, int clock_var)
28443 int issue_rate = -1;
28444 int n_ready = *pn_ready;
28449 /* Set up issue rate. */
28450 issue_rate = ix86_issue_rate ();
28452 /* Do reodering for BONNELL/SILVERMONT only. */
28453 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
28456 /* Nothing to do if ready list contains only 1 instruction. */
28460 /* Do reodering for post-reload scheduler only. */
28461 if (!reload_completed)
28464 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
28466 if (sched_verbose > 1)
28467 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
28468 INSN_UID (ready[index]));
28470 /* Put IMUL producer (ready[index]) at the top of ready list. */
28471 insn = ready[index];
28472 for (i = index; i < n_ready - 1; i++)
28473 ready[i] = ready[i + 1];
28474 ready[n_ready - 1] = insn;
28478 /* Skip selective scheduling since HID is not populated in it. */
28481 && swap_top_of_ready_list (ready, n_ready))
28483 if (sched_verbose > 1)
28484 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
28485 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
28486 /* Swap 2 top elements of ready list. */
28487 insn = ready[n_ready - 1];
28488 ready[n_ready - 1] = ready[n_ready - 2];
28489 ready[n_ready - 2] = insn;
28495 ix86_class_likely_spilled_p (reg_class_t);
28497 /* Returns true if lhs of insn is HW function argument register and set up
28498 is_spilled to true if it is likely spilled HW register. */
28500 insn_is_function_arg (rtx insn, bool* is_spilled)
28504 if (!NONDEBUG_INSN_P (insn))
28506 /* Call instructions are not movable, ignore it. */
28509 insn = PATTERN (insn);
28510 if (GET_CODE (insn) == PARALLEL)
28511 insn = XVECEXP (insn, 0, 0);
28512 if (GET_CODE (insn) != SET)
28514 dst = SET_DEST (insn);
28515 if (REG_P (dst) && HARD_REGISTER_P (dst)
28516 && ix86_function_arg_regno_p (REGNO (dst)))
28518 /* Is it likely spilled HW register? */
28519 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
28520 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
28521 *is_spilled = true;
28527 /* Add output dependencies for chain of function adjacent arguments if only
28528 there is a move to likely spilled HW register. Return first argument
28529 if at least one dependence was added or NULL otherwise. */
28531 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
28534 rtx_insn *last = call;
28535 rtx_insn *first_arg = NULL;
28536 bool is_spilled = false;
28538 head = PREV_INSN (head);
28540 /* Find nearest to call argument passing instruction. */
28543 last = PREV_INSN (last);
28546 if (!NONDEBUG_INSN_P (last))
28548 if (insn_is_function_arg (last, &is_spilled))
28556 insn = PREV_INSN (last);
28557 if (!INSN_P (insn))
28561 if (!NONDEBUG_INSN_P (insn))
28566 if (insn_is_function_arg (insn, &is_spilled))
28568 /* Add output depdendence between two function arguments if chain
28569 of output arguments contains likely spilled HW registers. */
28571 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28572 first_arg = last = insn;
28582 /* Add output or anti dependency from insn to first_arg to restrict its code
28585 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
28590 /* Add anti dependencies for bounds stores. */
28592 && GET_CODE (PATTERN (insn)) == PARALLEL
28593 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
28594 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
28596 add_dependence (first_arg, insn, REG_DEP_ANTI);
28600 set = single_set (insn);
28603 tmp = SET_DEST (set);
28606 /* Add output dependency to the first function argument. */
28607 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
28610 /* Add anti dependency. */
28611 add_dependence (first_arg, insn, REG_DEP_ANTI);
28614 /* Avoid cross block motion of function argument through adding dependency
28615 from the first non-jump instruction in bb. */
28617 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
28619 rtx_insn *insn = BB_END (bb);
28623 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
28625 rtx set = single_set (insn);
28628 avoid_func_arg_motion (arg, insn);
28632 if (insn == BB_HEAD (bb))
28634 insn = PREV_INSN (insn);
28638 /* Hook for pre-reload schedule - avoid motion of function arguments
28639 passed in likely spilled HW registers. */
28641 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
28644 rtx_insn *first_arg = NULL;
28645 if (reload_completed)
28647 while (head != tail && DEBUG_INSN_P (head))
28648 head = NEXT_INSN (head);
28649 for (insn = tail; insn != head; insn = PREV_INSN (insn))
28650 if (INSN_P (insn) && CALL_P (insn))
28652 first_arg = add_parameter_dependencies (insn, head);
28655 /* Add dependee for first argument to predecessors if only
28656 region contains more than one block. */
28657 basic_block bb = BLOCK_FOR_INSN (insn);
28658 int rgn = CONTAINING_RGN (bb->index);
28659 int nr_blks = RGN_NR_BLOCKS (rgn);
28660 /* Skip trivial regions and region head blocks that can have
28661 predecessors outside of region. */
28662 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
28667 /* Regions are SCCs with the exception of selective
28668 scheduling with pipelining of outer blocks enabled.
28669 So also check that immediate predecessors of a non-head
28670 block are in the same region. */
28671 FOR_EACH_EDGE (e, ei, bb->preds)
28673 /* Avoid creating of loop-carried dependencies through
28674 using topological ordering in the region. */
28675 if (rgn == CONTAINING_RGN (e->src->index)
28676 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
28677 add_dependee_for_func_arg (first_arg, e->src);
28685 else if (first_arg)
28686 avoid_func_arg_motion (first_arg, insn);
28689 /* Hook for pre-reload schedule - set priority of moves from likely spilled
28690 HW registers to maximum, to schedule them at soon as possible. These are
28691 moves from function argument registers at the top of the function entry
28692 and moves from function return value registers after call. */
28694 ix86_adjust_priority (rtx_insn *insn, int priority)
28698 if (reload_completed)
28701 if (!NONDEBUG_INSN_P (insn))
28704 set = single_set (insn);
28707 rtx tmp = SET_SRC (set);
28709 && HARD_REGISTER_P (tmp)
28710 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
28711 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
28712 return current_sched_info->sched_max_insns_priority;
28718 /* Model decoder of Core 2/i7.
28719 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
28720 track the instruction fetch block boundaries and make sure that long
28721 (9+ bytes) instructions are assigned to D0. */
28723 /* Maximum length of an insn that can be handled by
28724 a secondary decoder unit. '8' for Core 2/i7. */
28725 static int core2i7_secondary_decoder_max_insn_size;
28727 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
28728 '16' for Core 2/i7. */
28729 static int core2i7_ifetch_block_size;
28731 /* Maximum number of instructions decoder can handle per cycle.
28732 '6' for Core 2/i7. */
28733 static int core2i7_ifetch_block_max_insns;
28735 typedef struct ix86_first_cycle_multipass_data_ *
28736 ix86_first_cycle_multipass_data_t;
28737 typedef const struct ix86_first_cycle_multipass_data_ *
28738 const_ix86_first_cycle_multipass_data_t;
28740 /* A variable to store target state across calls to max_issue within
28742 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
28743 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
28745 /* Initialize DATA. */
28747 core2i7_first_cycle_multipass_init (void *_data)
28749 ix86_first_cycle_multipass_data_t data
28750 = (ix86_first_cycle_multipass_data_t) _data;
28752 data->ifetch_block_len = 0;
28753 data->ifetch_block_n_insns = 0;
28754 data->ready_try_change = NULL;
28755 data->ready_try_change_size = 0;
28758 /* Advancing the cycle; reset ifetch block counts. */
28760 core2i7_dfa_post_advance_cycle (void)
28762 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
28764 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28766 data->ifetch_block_len = 0;
28767 data->ifetch_block_n_insns = 0;
28770 static int min_insn_size (rtx_insn *);
28772 /* Filter out insns from ready_try that the core will not be able to issue
28773 on current cycle due to decoder. */
28775 core2i7_first_cycle_multipass_filter_ready_try
28776 (const_ix86_first_cycle_multipass_data_t data,
28777 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
28784 if (ready_try[n_ready])
28787 insn = get_ready_element (n_ready);
28788 insn_size = min_insn_size (insn);
28790 if (/* If this is a too long an insn for a secondary decoder ... */
28791 (!first_cycle_insn_p
28792 && insn_size > core2i7_secondary_decoder_max_insn_size)
28793 /* ... or it would not fit into the ifetch block ... */
28794 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
28795 /* ... or the decoder is full already ... */
28796 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
28797 /* ... mask the insn out. */
28799 ready_try[n_ready] = 1;
28801 if (data->ready_try_change)
28802 bitmap_set_bit (data->ready_try_change, n_ready);
28807 /* Prepare for a new round of multipass lookahead scheduling. */
28809 core2i7_first_cycle_multipass_begin (void *_data,
28810 signed char *ready_try, int n_ready,
28811 bool first_cycle_insn_p)
28813 ix86_first_cycle_multipass_data_t data
28814 = (ix86_first_cycle_multipass_data_t) _data;
28815 const_ix86_first_cycle_multipass_data_t prev_data
28816 = ix86_first_cycle_multipass_data;
28818 /* Restore the state from the end of the previous round. */
28819 data->ifetch_block_len = prev_data->ifetch_block_len;
28820 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
28822 /* Filter instructions that cannot be issued on current cycle due to
28823 decoder restrictions. */
28824 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28825 first_cycle_insn_p);
28828 /* INSN is being issued in current solution. Account for its impact on
28829 the decoder model. */
28831 core2i7_first_cycle_multipass_issue (void *_data,
28832 signed char *ready_try, int n_ready,
28833 rtx_insn *insn, const void *_prev_data)
28835 ix86_first_cycle_multipass_data_t data
28836 = (ix86_first_cycle_multipass_data_t) _data;
28837 const_ix86_first_cycle_multipass_data_t prev_data
28838 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
28840 int insn_size = min_insn_size (insn);
28842 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
28843 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
28844 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
28845 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
28847 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
28848 if (!data->ready_try_change)
28850 data->ready_try_change = sbitmap_alloc (n_ready);
28851 data->ready_try_change_size = n_ready;
28853 else if (data->ready_try_change_size < n_ready)
28855 data->ready_try_change = sbitmap_resize (data->ready_try_change,
28857 data->ready_try_change_size = n_ready;
28859 bitmap_clear (data->ready_try_change);
28861 /* Filter out insns from ready_try that the core will not be able to issue
28862 on current cycle due to decoder. */
28863 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
28867 /* Revert the effect on ready_try. */
28869 core2i7_first_cycle_multipass_backtrack (const void *_data,
28870 signed char *ready_try,
28871 int n_ready ATTRIBUTE_UNUSED)
28873 const_ix86_first_cycle_multipass_data_t data
28874 = (const_ix86_first_cycle_multipass_data_t) _data;
28875 unsigned int i = 0;
28876 sbitmap_iterator sbi;
28878 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
28879 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
28885 /* Save the result of multipass lookahead scheduling for the next round. */
28887 core2i7_first_cycle_multipass_end (const void *_data)
28889 const_ix86_first_cycle_multipass_data_t data
28890 = (const_ix86_first_cycle_multipass_data_t) _data;
28891 ix86_first_cycle_multipass_data_t next_data
28892 = ix86_first_cycle_multipass_data;
28896 next_data->ifetch_block_len = data->ifetch_block_len;
28897 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
28901 /* Deallocate target data. */
28903 core2i7_first_cycle_multipass_fini (void *_data)
28905 ix86_first_cycle_multipass_data_t data
28906 = (ix86_first_cycle_multipass_data_t) _data;
28908 if (data->ready_try_change)
28910 sbitmap_free (data->ready_try_change);
28911 data->ready_try_change = NULL;
28912 data->ready_try_change_size = 0;
28916 /* Prepare for scheduling pass. */
28918 ix86_sched_init_global (FILE *, int, int)
28920 /* Install scheduling hooks for current CPU. Some of these hooks are used
28921 in time-critical parts of the scheduler, so we only set them up when
28922 they are actually used. */
28925 case PROCESSOR_CORE2:
28926 case PROCESSOR_NEHALEM:
28927 case PROCESSOR_SANDYBRIDGE:
28928 case PROCESSOR_HASWELL:
28929 /* Do not perform multipass scheduling for pre-reload schedule
28930 to save compile time. */
28931 if (reload_completed)
28933 targetm.sched.dfa_post_advance_cycle
28934 = core2i7_dfa_post_advance_cycle;
28935 targetm.sched.first_cycle_multipass_init
28936 = core2i7_first_cycle_multipass_init;
28937 targetm.sched.first_cycle_multipass_begin
28938 = core2i7_first_cycle_multipass_begin;
28939 targetm.sched.first_cycle_multipass_issue
28940 = core2i7_first_cycle_multipass_issue;
28941 targetm.sched.first_cycle_multipass_backtrack
28942 = core2i7_first_cycle_multipass_backtrack;
28943 targetm.sched.first_cycle_multipass_end
28944 = core2i7_first_cycle_multipass_end;
28945 targetm.sched.first_cycle_multipass_fini
28946 = core2i7_first_cycle_multipass_fini;
28948 /* Set decoder parameters. */
28949 core2i7_secondary_decoder_max_insn_size = 8;
28950 core2i7_ifetch_block_size = 16;
28951 core2i7_ifetch_block_max_insns = 6;
28954 /* ... Fall through ... */
28956 targetm.sched.dfa_post_advance_cycle = NULL;
28957 targetm.sched.first_cycle_multipass_init = NULL;
28958 targetm.sched.first_cycle_multipass_begin = NULL;
28959 targetm.sched.first_cycle_multipass_issue = NULL;
28960 targetm.sched.first_cycle_multipass_backtrack = NULL;
28961 targetm.sched.first_cycle_multipass_end = NULL;
28962 targetm.sched.first_cycle_multipass_fini = NULL;
28968 /* Compute the alignment given to a constant that is being placed in memory.
28969 EXP is the constant and ALIGN is the alignment that the object would
28971 The value of this function is used instead of that alignment to align
28975 ix86_constant_alignment (tree exp, int align)
28977 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
28978 || TREE_CODE (exp) == INTEGER_CST)
28980 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
28982 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
28985 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
28986 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
28987 return BITS_PER_WORD;
28992 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
28993 the data type, and ALIGN is the alignment that the object would
28994 ordinarily have. */
28997 iamcu_alignment (tree type, int align)
28999 enum machine_mode mode;
29001 if (align < 32 || TYPE_USER_ALIGN (type))
29004 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
29006 mode = TYPE_MODE (strip_array_types (type));
29007 switch (GET_MODE_CLASS (mode))
29010 case MODE_COMPLEX_INT:
29011 case MODE_COMPLEX_FLOAT:
29013 case MODE_DECIMAL_FLOAT:
29020 /* Compute the alignment for a static variable.
29021 TYPE is the data type, and ALIGN is the alignment that
29022 the object would ordinarily have. The value of this function is used
29023 instead of that alignment to align the object. */
29026 ix86_data_alignment (tree type, int align, bool opt)
29028 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
29029 for symbols from other compilation units or symbols that don't need
29030 to bind locally. In order to preserve some ABI compatibility with
29031 those compilers, ensure we don't decrease alignment from what we
29034 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
29036 /* A data structure, equal or greater than the size of a cache line
29037 (64 bytes in the Pentium 4 and other recent Intel processors, including
29038 processors based on Intel Core microarchitecture) should be aligned
29039 so that its base address is a multiple of a cache line size. */
29042 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
29044 if (max_align < BITS_PER_WORD)
29045 max_align = BITS_PER_WORD;
29047 switch (ix86_align_data_type)
29049 case ix86_align_data_type_abi: opt = false; break;
29050 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
29051 case ix86_align_data_type_cacheline: break;
29055 align = iamcu_alignment (type, align);
29058 && AGGREGATE_TYPE_P (type)
29059 && TYPE_SIZE (type)
29060 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
29062 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
29063 && align < max_align_compat)
29064 align = max_align_compat;
29065 if (wi::geu_p (TYPE_SIZE (type), max_align)
29066 && align < max_align)
29070 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29071 to 16byte boundary. */
29074 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
29075 && TYPE_SIZE (type)
29076 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29077 && wi::geu_p (TYPE_SIZE (type), 128)
29085 if (TREE_CODE (type) == ARRAY_TYPE)
29087 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29089 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29092 else if (TREE_CODE (type) == COMPLEX_TYPE)
29095 if (TYPE_MODE (type) == DCmode && align < 64)
29097 if ((TYPE_MODE (type) == XCmode
29098 || TYPE_MODE (type) == TCmode) && align < 128)
29101 else if ((TREE_CODE (type) == RECORD_TYPE
29102 || TREE_CODE (type) == UNION_TYPE
29103 || TREE_CODE (type) == QUAL_UNION_TYPE)
29104 && TYPE_FIELDS (type))
29106 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29108 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29111 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29112 || TREE_CODE (type) == INTEGER_TYPE)
29114 if (TYPE_MODE (type) == DFmode && align < 64)
29116 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29123 /* Compute the alignment for a local variable or a stack slot. EXP is
29124 the data type or decl itself, MODE is the widest mode available and
29125 ALIGN is the alignment that the object would ordinarily have. The
29126 value of this macro is used instead of that alignment to align the
29130 ix86_local_alignment (tree exp, machine_mode mode,
29131 unsigned int align)
29135 if (exp && DECL_P (exp))
29137 type = TREE_TYPE (exp);
29146 /* Don't do dynamic stack realignment for long long objects with
29147 -mpreferred-stack-boundary=2. */
29150 && ix86_preferred_stack_boundary < 64
29151 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
29152 && (!type || !TYPE_USER_ALIGN (type))
29153 && (!decl || !DECL_USER_ALIGN (decl)))
29156 /* If TYPE is NULL, we are allocating a stack slot for caller-save
29157 register in MODE. We will return the largest alignment of XF
29161 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
29162 align = GET_MODE_ALIGNMENT (DFmode);
29166 /* Don't increase alignment for Intel MCU psABI. */
29170 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
29171 to 16byte boundary. Exact wording is:
29173 An array uses the same alignment as its elements, except that a local or
29174 global array variable of length at least 16 bytes or
29175 a C99 variable-length array variable always has alignment of at least 16 bytes.
29177 This was added to allow use of aligned SSE instructions at arrays. This
29178 rule is meant for static storage (where compiler can not do the analysis
29179 by itself). We follow it for automatic variables only when convenient.
29180 We fully control everything in the function compiled and functions from
29181 other unit can not rely on the alignment.
29183 Exclude va_list type. It is the common case of local array where
29184 we can not benefit from the alignment.
29186 TODO: Probably one should optimize for size only when var is not escaping. */
29187 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
29190 if (AGGREGATE_TYPE_P (type)
29191 && (va_list_type_node == NULL_TREE
29192 || (TYPE_MAIN_VARIANT (type)
29193 != TYPE_MAIN_VARIANT (va_list_type_node)))
29194 && TYPE_SIZE (type)
29195 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
29196 && wi::geu_p (TYPE_SIZE (type), 16)
29200 if (TREE_CODE (type) == ARRAY_TYPE)
29202 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
29204 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
29207 else if (TREE_CODE (type) == COMPLEX_TYPE)
29209 if (TYPE_MODE (type) == DCmode && align < 64)
29211 if ((TYPE_MODE (type) == XCmode
29212 || TYPE_MODE (type) == TCmode) && align < 128)
29215 else if ((TREE_CODE (type) == RECORD_TYPE
29216 || TREE_CODE (type) == UNION_TYPE
29217 || TREE_CODE (type) == QUAL_UNION_TYPE)
29218 && TYPE_FIELDS (type))
29220 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
29222 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
29225 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
29226 || TREE_CODE (type) == INTEGER_TYPE)
29229 if (TYPE_MODE (type) == DFmode && align < 64)
29231 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
29237 /* Compute the minimum required alignment for dynamic stack realignment
29238 purposes for a local variable, parameter or a stack slot. EXP is
29239 the data type or decl itself, MODE is its mode and ALIGN is the
29240 alignment that the object would ordinarily have. */
29243 ix86_minimum_alignment (tree exp, machine_mode mode,
29244 unsigned int align)
29248 if (exp && DECL_P (exp))
29250 type = TREE_TYPE (exp);
29259 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
29262 /* Don't do dynamic stack realignment for long long objects with
29263 -mpreferred-stack-boundary=2. */
29264 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
29265 && (!type || !TYPE_USER_ALIGN (type))
29266 && (!decl || !DECL_USER_ALIGN (decl)))
29272 /* Find a location for the static chain incoming to a nested function.
29273 This is a register, unless all free registers are used by arguments. */
29276 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
29280 /* While this function won't be called by the middle-end when a static
29281 chain isn't needed, it's also used throughout the backend so it's
29282 easiest to keep this check centralized. */
29283 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
29288 /* We always use R10 in 64-bit mode. */
29293 const_tree fntype, fndecl;
29296 /* By default in 32-bit mode we use ECX to pass the static chain. */
29299 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
29301 fntype = TREE_TYPE (fndecl_or_type);
29302 fndecl = fndecl_or_type;
29306 fntype = fndecl_or_type;
29310 ccvt = ix86_get_callcvt (fntype);
29311 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
29313 /* Fastcall functions use ecx/edx for arguments, which leaves
29314 us with EAX for the static chain.
29315 Thiscall functions use ecx for arguments, which also
29316 leaves us with EAX for the static chain. */
29319 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
29321 /* Thiscall functions use ecx for arguments, which leaves
29322 us with EAX and EDX for the static chain.
29323 We are using for abi-compatibility EAX. */
29326 else if (ix86_function_regparm (fntype, fndecl) == 3)
29328 /* For regparm 3, we have no free call-clobbered registers in
29329 which to store the static chain. In order to implement this,
29330 we have the trampoline push the static chain to the stack.
29331 However, we can't push a value below the return address when
29332 we call the nested function directly, so we have to use an
29333 alternate entry point. For this we use ESI, and have the
29334 alternate entry point push ESI, so that things appear the
29335 same once we're executing the nested function. */
29338 if (fndecl == current_function_decl)
29339 ix86_static_chain_on_stack = true;
29340 return gen_frame_mem (SImode,
29341 plus_constant (Pmode,
29342 arg_pointer_rtx, -8));
29348 return gen_rtx_REG (Pmode, regno);
29351 /* Emit RTL insns to initialize the variable parts of a trampoline.
29352 FNDECL is the decl of the target address; M_TRAMP is a MEM for
29353 the trampoline, and CHAIN_VALUE is an RTX for the static chain
29354 to be passed to the target function. */
29357 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
29363 fnaddr = XEXP (DECL_RTL (fndecl), 0);
29369 /* Load the function address to r11. Try to load address using
29370 the shorter movl instead of movabs. We may want to support
29371 movq for kernel mode, but kernel does not use trampolines at
29372 the moment. FNADDR is a 32bit address and may not be in
29373 DImode when ptr_mode == SImode. Always use movl in this
29375 if (ptr_mode == SImode
29376 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
29378 fnaddr = copy_addr_to_reg (fnaddr);
29380 mem = adjust_address (m_tramp, HImode, offset);
29381 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
29383 mem = adjust_address (m_tramp, SImode, offset + 2);
29384 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
29389 mem = adjust_address (m_tramp, HImode, offset);
29390 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
29392 mem = adjust_address (m_tramp, DImode, offset + 2);
29393 emit_move_insn (mem, fnaddr);
29397 /* Load static chain using movabs to r10. Use the shorter movl
29398 instead of movabs when ptr_mode == SImode. */
29399 if (ptr_mode == SImode)
29410 mem = adjust_address (m_tramp, HImode, offset);
29411 emit_move_insn (mem, gen_int_mode (opcode, HImode));
29413 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
29414 emit_move_insn (mem, chain_value);
29417 /* Jump to r11; the last (unused) byte is a nop, only there to
29418 pad the write out to a single 32-bit store. */
29419 mem = adjust_address (m_tramp, SImode, offset);
29420 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
29427 /* Depending on the static chain location, either load a register
29428 with a constant, or push the constant to the stack. All of the
29429 instructions are the same size. */
29430 chain = ix86_static_chain (fndecl, true);
29433 switch (REGNO (chain))
29436 opcode = 0xb8; break;
29438 opcode = 0xb9; break;
29440 gcc_unreachable ();
29446 mem = adjust_address (m_tramp, QImode, offset);
29447 emit_move_insn (mem, gen_int_mode (opcode, QImode));
29449 mem = adjust_address (m_tramp, SImode, offset + 1);
29450 emit_move_insn (mem, chain_value);
29453 mem = adjust_address (m_tramp, QImode, offset);
29454 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
29456 mem = adjust_address (m_tramp, SImode, offset + 1);
29458 /* Compute offset from the end of the jmp to the target function.
29459 In the case in which the trampoline stores the static chain on
29460 the stack, we need to skip the first insn which pushes the
29461 (call-saved) register static chain; this push is 1 byte. */
29463 disp = expand_binop (SImode, sub_optab, fnaddr,
29464 plus_constant (Pmode, XEXP (m_tramp, 0),
29465 offset - (MEM_P (chain) ? 1 : 0)),
29466 NULL_RTX, 1, OPTAB_DIRECT);
29467 emit_move_insn (mem, disp);
29470 gcc_assert (offset <= TRAMPOLINE_SIZE);
29472 #ifdef HAVE_ENABLE_EXECUTE_STACK
29473 #ifdef CHECK_EXECUTE_STACK_ENABLED
29474 if (CHECK_EXECUTE_STACK_ENABLED)
29476 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
29477 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
29481 /* The following file contains several enumerations and data structures
29482 built from the definitions in i386-builtin-types.def. */
29484 #include "i386-builtin-types.inc"
29486 /* Table for the ix86 builtin non-function types. */
29487 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
29489 /* Retrieve an element from the above table, building some of
29490 the types lazily. */
29493 ix86_get_builtin_type (enum ix86_builtin_type tcode)
29495 unsigned int index;
29498 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
29500 type = ix86_builtin_type_tab[(int) tcode];
29504 gcc_assert (tcode > IX86_BT_LAST_PRIM);
29505 if (tcode <= IX86_BT_LAST_VECT)
29509 index = tcode - IX86_BT_LAST_PRIM - 1;
29510 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
29511 mode = ix86_builtin_type_vect_mode[index];
29513 type = build_vector_type_for_mode (itype, mode);
29519 index = tcode - IX86_BT_LAST_VECT - 1;
29520 if (tcode <= IX86_BT_LAST_PTR)
29521 quals = TYPE_UNQUALIFIED;
29523 quals = TYPE_QUAL_CONST;
29525 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
29526 if (quals != TYPE_UNQUALIFIED)
29527 itype = build_qualified_type (itype, quals);
29529 type = build_pointer_type (itype);
29532 ix86_builtin_type_tab[(int) tcode] = type;
29536 /* Table for the ix86 builtin function types. */
29537 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
29539 /* Retrieve an element from the above table, building some of
29540 the types lazily. */
29543 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
29547 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
29549 type = ix86_builtin_func_type_tab[(int) tcode];
29553 if (tcode <= IX86_BT_LAST_FUNC)
29555 unsigned start = ix86_builtin_func_start[(int) tcode];
29556 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
29557 tree rtype, atype, args = void_list_node;
29560 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
29561 for (i = after - 1; i > start; --i)
29563 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
29564 args = tree_cons (NULL, atype, args);
29567 type = build_function_type (rtype, args);
29571 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
29572 enum ix86_builtin_func_type icode;
29574 icode = ix86_builtin_func_alias_base[index];
29575 type = ix86_get_builtin_func_type (icode);
29578 ix86_builtin_func_type_tab[(int) tcode] = type;
29583 /* Codes for all the SSE/MMX builtins. */
29586 IX86_BUILTIN_ADDPS,
29587 IX86_BUILTIN_ADDSS,
29588 IX86_BUILTIN_DIVPS,
29589 IX86_BUILTIN_DIVSS,
29590 IX86_BUILTIN_MULPS,
29591 IX86_BUILTIN_MULSS,
29592 IX86_BUILTIN_SUBPS,
29593 IX86_BUILTIN_SUBSS,
29595 IX86_BUILTIN_CMPEQPS,
29596 IX86_BUILTIN_CMPLTPS,
29597 IX86_BUILTIN_CMPLEPS,
29598 IX86_BUILTIN_CMPGTPS,
29599 IX86_BUILTIN_CMPGEPS,
29600 IX86_BUILTIN_CMPNEQPS,
29601 IX86_BUILTIN_CMPNLTPS,
29602 IX86_BUILTIN_CMPNLEPS,
29603 IX86_BUILTIN_CMPNGTPS,
29604 IX86_BUILTIN_CMPNGEPS,
29605 IX86_BUILTIN_CMPORDPS,
29606 IX86_BUILTIN_CMPUNORDPS,
29607 IX86_BUILTIN_CMPEQSS,
29608 IX86_BUILTIN_CMPLTSS,
29609 IX86_BUILTIN_CMPLESS,
29610 IX86_BUILTIN_CMPNEQSS,
29611 IX86_BUILTIN_CMPNLTSS,
29612 IX86_BUILTIN_CMPNLESS,
29613 IX86_BUILTIN_CMPORDSS,
29614 IX86_BUILTIN_CMPUNORDSS,
29616 IX86_BUILTIN_COMIEQSS,
29617 IX86_BUILTIN_COMILTSS,
29618 IX86_BUILTIN_COMILESS,
29619 IX86_BUILTIN_COMIGTSS,
29620 IX86_BUILTIN_COMIGESS,
29621 IX86_BUILTIN_COMINEQSS,
29622 IX86_BUILTIN_UCOMIEQSS,
29623 IX86_BUILTIN_UCOMILTSS,
29624 IX86_BUILTIN_UCOMILESS,
29625 IX86_BUILTIN_UCOMIGTSS,
29626 IX86_BUILTIN_UCOMIGESS,
29627 IX86_BUILTIN_UCOMINEQSS,
29629 IX86_BUILTIN_CVTPI2PS,
29630 IX86_BUILTIN_CVTPS2PI,
29631 IX86_BUILTIN_CVTSI2SS,
29632 IX86_BUILTIN_CVTSI642SS,
29633 IX86_BUILTIN_CVTSS2SI,
29634 IX86_BUILTIN_CVTSS2SI64,
29635 IX86_BUILTIN_CVTTPS2PI,
29636 IX86_BUILTIN_CVTTSS2SI,
29637 IX86_BUILTIN_CVTTSS2SI64,
29639 IX86_BUILTIN_MAXPS,
29640 IX86_BUILTIN_MAXSS,
29641 IX86_BUILTIN_MINPS,
29642 IX86_BUILTIN_MINSS,
29644 IX86_BUILTIN_LOADUPS,
29645 IX86_BUILTIN_STOREUPS,
29646 IX86_BUILTIN_MOVSS,
29648 IX86_BUILTIN_MOVHLPS,
29649 IX86_BUILTIN_MOVLHPS,
29650 IX86_BUILTIN_LOADHPS,
29651 IX86_BUILTIN_LOADLPS,
29652 IX86_BUILTIN_STOREHPS,
29653 IX86_BUILTIN_STORELPS,
29655 IX86_BUILTIN_MASKMOVQ,
29656 IX86_BUILTIN_MOVMSKPS,
29657 IX86_BUILTIN_PMOVMSKB,
29659 IX86_BUILTIN_MOVNTPS,
29660 IX86_BUILTIN_MOVNTQ,
29662 IX86_BUILTIN_LOADDQU,
29663 IX86_BUILTIN_STOREDQU,
29665 IX86_BUILTIN_PACKSSWB,
29666 IX86_BUILTIN_PACKSSDW,
29667 IX86_BUILTIN_PACKUSWB,
29669 IX86_BUILTIN_PADDB,
29670 IX86_BUILTIN_PADDW,
29671 IX86_BUILTIN_PADDD,
29672 IX86_BUILTIN_PADDQ,
29673 IX86_BUILTIN_PADDSB,
29674 IX86_BUILTIN_PADDSW,
29675 IX86_BUILTIN_PADDUSB,
29676 IX86_BUILTIN_PADDUSW,
29677 IX86_BUILTIN_PSUBB,
29678 IX86_BUILTIN_PSUBW,
29679 IX86_BUILTIN_PSUBD,
29680 IX86_BUILTIN_PSUBQ,
29681 IX86_BUILTIN_PSUBSB,
29682 IX86_BUILTIN_PSUBSW,
29683 IX86_BUILTIN_PSUBUSB,
29684 IX86_BUILTIN_PSUBUSW,
29687 IX86_BUILTIN_PANDN,
29691 IX86_BUILTIN_PAVGB,
29692 IX86_BUILTIN_PAVGW,
29694 IX86_BUILTIN_PCMPEQB,
29695 IX86_BUILTIN_PCMPEQW,
29696 IX86_BUILTIN_PCMPEQD,
29697 IX86_BUILTIN_PCMPGTB,
29698 IX86_BUILTIN_PCMPGTW,
29699 IX86_BUILTIN_PCMPGTD,
29701 IX86_BUILTIN_PMADDWD,
29703 IX86_BUILTIN_PMAXSW,
29704 IX86_BUILTIN_PMAXUB,
29705 IX86_BUILTIN_PMINSW,
29706 IX86_BUILTIN_PMINUB,
29708 IX86_BUILTIN_PMULHUW,
29709 IX86_BUILTIN_PMULHW,
29710 IX86_BUILTIN_PMULLW,
29712 IX86_BUILTIN_PSADBW,
29713 IX86_BUILTIN_PSHUFW,
29715 IX86_BUILTIN_PSLLW,
29716 IX86_BUILTIN_PSLLD,
29717 IX86_BUILTIN_PSLLQ,
29718 IX86_BUILTIN_PSRAW,
29719 IX86_BUILTIN_PSRAD,
29720 IX86_BUILTIN_PSRLW,
29721 IX86_BUILTIN_PSRLD,
29722 IX86_BUILTIN_PSRLQ,
29723 IX86_BUILTIN_PSLLWI,
29724 IX86_BUILTIN_PSLLDI,
29725 IX86_BUILTIN_PSLLQI,
29726 IX86_BUILTIN_PSRAWI,
29727 IX86_BUILTIN_PSRADI,
29728 IX86_BUILTIN_PSRLWI,
29729 IX86_BUILTIN_PSRLDI,
29730 IX86_BUILTIN_PSRLQI,
29732 IX86_BUILTIN_PUNPCKHBW,
29733 IX86_BUILTIN_PUNPCKHWD,
29734 IX86_BUILTIN_PUNPCKHDQ,
29735 IX86_BUILTIN_PUNPCKLBW,
29736 IX86_BUILTIN_PUNPCKLWD,
29737 IX86_BUILTIN_PUNPCKLDQ,
29739 IX86_BUILTIN_SHUFPS,
29741 IX86_BUILTIN_RCPPS,
29742 IX86_BUILTIN_RCPSS,
29743 IX86_BUILTIN_RSQRTPS,
29744 IX86_BUILTIN_RSQRTPS_NR,
29745 IX86_BUILTIN_RSQRTSS,
29746 IX86_BUILTIN_RSQRTF,
29747 IX86_BUILTIN_SQRTPS,
29748 IX86_BUILTIN_SQRTPS_NR,
29749 IX86_BUILTIN_SQRTSS,
29751 IX86_BUILTIN_UNPCKHPS,
29752 IX86_BUILTIN_UNPCKLPS,
29754 IX86_BUILTIN_ANDPS,
29755 IX86_BUILTIN_ANDNPS,
29757 IX86_BUILTIN_XORPS,
29760 IX86_BUILTIN_LDMXCSR,
29761 IX86_BUILTIN_STMXCSR,
29762 IX86_BUILTIN_SFENCE,
29764 IX86_BUILTIN_FXSAVE,
29765 IX86_BUILTIN_FXRSTOR,
29766 IX86_BUILTIN_FXSAVE64,
29767 IX86_BUILTIN_FXRSTOR64,
29769 IX86_BUILTIN_XSAVE,
29770 IX86_BUILTIN_XRSTOR,
29771 IX86_BUILTIN_XSAVE64,
29772 IX86_BUILTIN_XRSTOR64,
29774 IX86_BUILTIN_XSAVEOPT,
29775 IX86_BUILTIN_XSAVEOPT64,
29777 IX86_BUILTIN_XSAVEC,
29778 IX86_BUILTIN_XSAVEC64,
29780 IX86_BUILTIN_XSAVES,
29781 IX86_BUILTIN_XRSTORS,
29782 IX86_BUILTIN_XSAVES64,
29783 IX86_BUILTIN_XRSTORS64,
29785 /* 3DNow! Original */
29786 IX86_BUILTIN_FEMMS,
29787 IX86_BUILTIN_PAVGUSB,
29788 IX86_BUILTIN_PF2ID,
29789 IX86_BUILTIN_PFACC,
29790 IX86_BUILTIN_PFADD,
29791 IX86_BUILTIN_PFCMPEQ,
29792 IX86_BUILTIN_PFCMPGE,
29793 IX86_BUILTIN_PFCMPGT,
29794 IX86_BUILTIN_PFMAX,
29795 IX86_BUILTIN_PFMIN,
29796 IX86_BUILTIN_PFMUL,
29797 IX86_BUILTIN_PFRCP,
29798 IX86_BUILTIN_PFRCPIT1,
29799 IX86_BUILTIN_PFRCPIT2,
29800 IX86_BUILTIN_PFRSQIT1,
29801 IX86_BUILTIN_PFRSQRT,
29802 IX86_BUILTIN_PFSUB,
29803 IX86_BUILTIN_PFSUBR,
29804 IX86_BUILTIN_PI2FD,
29805 IX86_BUILTIN_PMULHRW,
29807 /* 3DNow! Athlon Extensions */
29808 IX86_BUILTIN_PF2IW,
29809 IX86_BUILTIN_PFNACC,
29810 IX86_BUILTIN_PFPNACC,
29811 IX86_BUILTIN_PI2FW,
29812 IX86_BUILTIN_PSWAPDSI,
29813 IX86_BUILTIN_PSWAPDSF,
29816 IX86_BUILTIN_ADDPD,
29817 IX86_BUILTIN_ADDSD,
29818 IX86_BUILTIN_DIVPD,
29819 IX86_BUILTIN_DIVSD,
29820 IX86_BUILTIN_MULPD,
29821 IX86_BUILTIN_MULSD,
29822 IX86_BUILTIN_SUBPD,
29823 IX86_BUILTIN_SUBSD,
29825 IX86_BUILTIN_CMPEQPD,
29826 IX86_BUILTIN_CMPLTPD,
29827 IX86_BUILTIN_CMPLEPD,
29828 IX86_BUILTIN_CMPGTPD,
29829 IX86_BUILTIN_CMPGEPD,
29830 IX86_BUILTIN_CMPNEQPD,
29831 IX86_BUILTIN_CMPNLTPD,
29832 IX86_BUILTIN_CMPNLEPD,
29833 IX86_BUILTIN_CMPNGTPD,
29834 IX86_BUILTIN_CMPNGEPD,
29835 IX86_BUILTIN_CMPORDPD,
29836 IX86_BUILTIN_CMPUNORDPD,
29837 IX86_BUILTIN_CMPEQSD,
29838 IX86_BUILTIN_CMPLTSD,
29839 IX86_BUILTIN_CMPLESD,
29840 IX86_BUILTIN_CMPNEQSD,
29841 IX86_BUILTIN_CMPNLTSD,
29842 IX86_BUILTIN_CMPNLESD,
29843 IX86_BUILTIN_CMPORDSD,
29844 IX86_BUILTIN_CMPUNORDSD,
29846 IX86_BUILTIN_COMIEQSD,
29847 IX86_BUILTIN_COMILTSD,
29848 IX86_BUILTIN_COMILESD,
29849 IX86_BUILTIN_COMIGTSD,
29850 IX86_BUILTIN_COMIGESD,
29851 IX86_BUILTIN_COMINEQSD,
29852 IX86_BUILTIN_UCOMIEQSD,
29853 IX86_BUILTIN_UCOMILTSD,
29854 IX86_BUILTIN_UCOMILESD,
29855 IX86_BUILTIN_UCOMIGTSD,
29856 IX86_BUILTIN_UCOMIGESD,
29857 IX86_BUILTIN_UCOMINEQSD,
29859 IX86_BUILTIN_MAXPD,
29860 IX86_BUILTIN_MAXSD,
29861 IX86_BUILTIN_MINPD,
29862 IX86_BUILTIN_MINSD,
29864 IX86_BUILTIN_ANDPD,
29865 IX86_BUILTIN_ANDNPD,
29867 IX86_BUILTIN_XORPD,
29869 IX86_BUILTIN_SQRTPD,
29870 IX86_BUILTIN_SQRTSD,
29872 IX86_BUILTIN_UNPCKHPD,
29873 IX86_BUILTIN_UNPCKLPD,
29875 IX86_BUILTIN_SHUFPD,
29877 IX86_BUILTIN_LOADUPD,
29878 IX86_BUILTIN_STOREUPD,
29879 IX86_BUILTIN_MOVSD,
29881 IX86_BUILTIN_LOADHPD,
29882 IX86_BUILTIN_LOADLPD,
29884 IX86_BUILTIN_CVTDQ2PD,
29885 IX86_BUILTIN_CVTDQ2PS,
29887 IX86_BUILTIN_CVTPD2DQ,
29888 IX86_BUILTIN_CVTPD2PI,
29889 IX86_BUILTIN_CVTPD2PS,
29890 IX86_BUILTIN_CVTTPD2DQ,
29891 IX86_BUILTIN_CVTTPD2PI,
29893 IX86_BUILTIN_CVTPI2PD,
29894 IX86_BUILTIN_CVTSI2SD,
29895 IX86_BUILTIN_CVTSI642SD,
29897 IX86_BUILTIN_CVTSD2SI,
29898 IX86_BUILTIN_CVTSD2SI64,
29899 IX86_BUILTIN_CVTSD2SS,
29900 IX86_BUILTIN_CVTSS2SD,
29901 IX86_BUILTIN_CVTTSD2SI,
29902 IX86_BUILTIN_CVTTSD2SI64,
29904 IX86_BUILTIN_CVTPS2DQ,
29905 IX86_BUILTIN_CVTPS2PD,
29906 IX86_BUILTIN_CVTTPS2DQ,
29908 IX86_BUILTIN_MOVNTI,
29909 IX86_BUILTIN_MOVNTI64,
29910 IX86_BUILTIN_MOVNTPD,
29911 IX86_BUILTIN_MOVNTDQ,
29913 IX86_BUILTIN_MOVQ128,
29916 IX86_BUILTIN_MASKMOVDQU,
29917 IX86_BUILTIN_MOVMSKPD,
29918 IX86_BUILTIN_PMOVMSKB128,
29920 IX86_BUILTIN_PACKSSWB128,
29921 IX86_BUILTIN_PACKSSDW128,
29922 IX86_BUILTIN_PACKUSWB128,
29924 IX86_BUILTIN_PADDB128,
29925 IX86_BUILTIN_PADDW128,
29926 IX86_BUILTIN_PADDD128,
29927 IX86_BUILTIN_PADDQ128,
29928 IX86_BUILTIN_PADDSB128,
29929 IX86_BUILTIN_PADDSW128,
29930 IX86_BUILTIN_PADDUSB128,
29931 IX86_BUILTIN_PADDUSW128,
29932 IX86_BUILTIN_PSUBB128,
29933 IX86_BUILTIN_PSUBW128,
29934 IX86_BUILTIN_PSUBD128,
29935 IX86_BUILTIN_PSUBQ128,
29936 IX86_BUILTIN_PSUBSB128,
29937 IX86_BUILTIN_PSUBSW128,
29938 IX86_BUILTIN_PSUBUSB128,
29939 IX86_BUILTIN_PSUBUSW128,
29941 IX86_BUILTIN_PAND128,
29942 IX86_BUILTIN_PANDN128,
29943 IX86_BUILTIN_POR128,
29944 IX86_BUILTIN_PXOR128,
29946 IX86_BUILTIN_PAVGB128,
29947 IX86_BUILTIN_PAVGW128,
29949 IX86_BUILTIN_PCMPEQB128,
29950 IX86_BUILTIN_PCMPEQW128,
29951 IX86_BUILTIN_PCMPEQD128,
29952 IX86_BUILTIN_PCMPGTB128,
29953 IX86_BUILTIN_PCMPGTW128,
29954 IX86_BUILTIN_PCMPGTD128,
29956 IX86_BUILTIN_PMADDWD128,
29958 IX86_BUILTIN_PMAXSW128,
29959 IX86_BUILTIN_PMAXUB128,
29960 IX86_BUILTIN_PMINSW128,
29961 IX86_BUILTIN_PMINUB128,
29963 IX86_BUILTIN_PMULUDQ,
29964 IX86_BUILTIN_PMULUDQ128,
29965 IX86_BUILTIN_PMULHUW128,
29966 IX86_BUILTIN_PMULHW128,
29967 IX86_BUILTIN_PMULLW128,
29969 IX86_BUILTIN_PSADBW128,
29970 IX86_BUILTIN_PSHUFHW,
29971 IX86_BUILTIN_PSHUFLW,
29972 IX86_BUILTIN_PSHUFD,
29974 IX86_BUILTIN_PSLLDQI128,
29975 IX86_BUILTIN_PSLLWI128,
29976 IX86_BUILTIN_PSLLDI128,
29977 IX86_BUILTIN_PSLLQI128,
29978 IX86_BUILTIN_PSRAWI128,
29979 IX86_BUILTIN_PSRADI128,
29980 IX86_BUILTIN_PSRLDQI128,
29981 IX86_BUILTIN_PSRLWI128,
29982 IX86_BUILTIN_PSRLDI128,
29983 IX86_BUILTIN_PSRLQI128,
29985 IX86_BUILTIN_PSLLDQ128,
29986 IX86_BUILTIN_PSLLW128,
29987 IX86_BUILTIN_PSLLD128,
29988 IX86_BUILTIN_PSLLQ128,
29989 IX86_BUILTIN_PSRAW128,
29990 IX86_BUILTIN_PSRAD128,
29991 IX86_BUILTIN_PSRLW128,
29992 IX86_BUILTIN_PSRLD128,
29993 IX86_BUILTIN_PSRLQ128,
29995 IX86_BUILTIN_PUNPCKHBW128,
29996 IX86_BUILTIN_PUNPCKHWD128,
29997 IX86_BUILTIN_PUNPCKHDQ128,
29998 IX86_BUILTIN_PUNPCKHQDQ128,
29999 IX86_BUILTIN_PUNPCKLBW128,
30000 IX86_BUILTIN_PUNPCKLWD128,
30001 IX86_BUILTIN_PUNPCKLDQ128,
30002 IX86_BUILTIN_PUNPCKLQDQ128,
30004 IX86_BUILTIN_CLFLUSH,
30005 IX86_BUILTIN_MFENCE,
30006 IX86_BUILTIN_LFENCE,
30007 IX86_BUILTIN_PAUSE,
30009 IX86_BUILTIN_FNSTENV,
30010 IX86_BUILTIN_FLDENV,
30011 IX86_BUILTIN_FNSTSW,
30012 IX86_BUILTIN_FNCLEX,
30014 IX86_BUILTIN_BSRSI,
30015 IX86_BUILTIN_BSRDI,
30016 IX86_BUILTIN_RDPMC,
30017 IX86_BUILTIN_RDTSC,
30018 IX86_BUILTIN_RDTSCP,
30019 IX86_BUILTIN_ROLQI,
30020 IX86_BUILTIN_ROLHI,
30021 IX86_BUILTIN_RORQI,
30022 IX86_BUILTIN_RORHI,
30025 IX86_BUILTIN_ADDSUBPS,
30026 IX86_BUILTIN_HADDPS,
30027 IX86_BUILTIN_HSUBPS,
30028 IX86_BUILTIN_MOVSHDUP,
30029 IX86_BUILTIN_MOVSLDUP,
30030 IX86_BUILTIN_ADDSUBPD,
30031 IX86_BUILTIN_HADDPD,
30032 IX86_BUILTIN_HSUBPD,
30033 IX86_BUILTIN_LDDQU,
30035 IX86_BUILTIN_MONITOR,
30036 IX86_BUILTIN_MWAIT,
30037 IX86_BUILTIN_CLZERO,
30040 IX86_BUILTIN_PHADDW,
30041 IX86_BUILTIN_PHADDD,
30042 IX86_BUILTIN_PHADDSW,
30043 IX86_BUILTIN_PHSUBW,
30044 IX86_BUILTIN_PHSUBD,
30045 IX86_BUILTIN_PHSUBSW,
30046 IX86_BUILTIN_PMADDUBSW,
30047 IX86_BUILTIN_PMULHRSW,
30048 IX86_BUILTIN_PSHUFB,
30049 IX86_BUILTIN_PSIGNB,
30050 IX86_BUILTIN_PSIGNW,
30051 IX86_BUILTIN_PSIGND,
30052 IX86_BUILTIN_PALIGNR,
30053 IX86_BUILTIN_PABSB,
30054 IX86_BUILTIN_PABSW,
30055 IX86_BUILTIN_PABSD,
30057 IX86_BUILTIN_PHADDW128,
30058 IX86_BUILTIN_PHADDD128,
30059 IX86_BUILTIN_PHADDSW128,
30060 IX86_BUILTIN_PHSUBW128,
30061 IX86_BUILTIN_PHSUBD128,
30062 IX86_BUILTIN_PHSUBSW128,
30063 IX86_BUILTIN_PMADDUBSW128,
30064 IX86_BUILTIN_PMULHRSW128,
30065 IX86_BUILTIN_PSHUFB128,
30066 IX86_BUILTIN_PSIGNB128,
30067 IX86_BUILTIN_PSIGNW128,
30068 IX86_BUILTIN_PSIGND128,
30069 IX86_BUILTIN_PALIGNR128,
30070 IX86_BUILTIN_PABSB128,
30071 IX86_BUILTIN_PABSW128,
30072 IX86_BUILTIN_PABSD128,
30074 /* AMDFAM10 - SSE4A New Instructions. */
30075 IX86_BUILTIN_MOVNTSD,
30076 IX86_BUILTIN_MOVNTSS,
30077 IX86_BUILTIN_EXTRQI,
30078 IX86_BUILTIN_EXTRQ,
30079 IX86_BUILTIN_INSERTQI,
30080 IX86_BUILTIN_INSERTQ,
30083 IX86_BUILTIN_BLENDPD,
30084 IX86_BUILTIN_BLENDPS,
30085 IX86_BUILTIN_BLENDVPD,
30086 IX86_BUILTIN_BLENDVPS,
30087 IX86_BUILTIN_PBLENDVB128,
30088 IX86_BUILTIN_PBLENDW128,
30093 IX86_BUILTIN_INSERTPS128,
30095 IX86_BUILTIN_MOVNTDQA,
30096 IX86_BUILTIN_MPSADBW128,
30097 IX86_BUILTIN_PACKUSDW128,
30098 IX86_BUILTIN_PCMPEQQ,
30099 IX86_BUILTIN_PHMINPOSUW128,
30101 IX86_BUILTIN_PMAXSB128,
30102 IX86_BUILTIN_PMAXSD128,
30103 IX86_BUILTIN_PMAXUD128,
30104 IX86_BUILTIN_PMAXUW128,
30106 IX86_BUILTIN_PMINSB128,
30107 IX86_BUILTIN_PMINSD128,
30108 IX86_BUILTIN_PMINUD128,
30109 IX86_BUILTIN_PMINUW128,
30111 IX86_BUILTIN_PMOVSXBW128,
30112 IX86_BUILTIN_PMOVSXBD128,
30113 IX86_BUILTIN_PMOVSXBQ128,
30114 IX86_BUILTIN_PMOVSXWD128,
30115 IX86_BUILTIN_PMOVSXWQ128,
30116 IX86_BUILTIN_PMOVSXDQ128,
30118 IX86_BUILTIN_PMOVZXBW128,
30119 IX86_BUILTIN_PMOVZXBD128,
30120 IX86_BUILTIN_PMOVZXBQ128,
30121 IX86_BUILTIN_PMOVZXWD128,
30122 IX86_BUILTIN_PMOVZXWQ128,
30123 IX86_BUILTIN_PMOVZXDQ128,
30125 IX86_BUILTIN_PMULDQ128,
30126 IX86_BUILTIN_PMULLD128,
30128 IX86_BUILTIN_ROUNDSD,
30129 IX86_BUILTIN_ROUNDSS,
30131 IX86_BUILTIN_ROUNDPD,
30132 IX86_BUILTIN_ROUNDPS,
30134 IX86_BUILTIN_FLOORPD,
30135 IX86_BUILTIN_CEILPD,
30136 IX86_BUILTIN_TRUNCPD,
30137 IX86_BUILTIN_RINTPD,
30138 IX86_BUILTIN_ROUNDPD_AZ,
30140 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
30141 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
30142 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
30144 IX86_BUILTIN_FLOORPS,
30145 IX86_BUILTIN_CEILPS,
30146 IX86_BUILTIN_TRUNCPS,
30147 IX86_BUILTIN_RINTPS,
30148 IX86_BUILTIN_ROUNDPS_AZ,
30150 IX86_BUILTIN_FLOORPS_SFIX,
30151 IX86_BUILTIN_CEILPS_SFIX,
30152 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
30154 IX86_BUILTIN_PTESTZ,
30155 IX86_BUILTIN_PTESTC,
30156 IX86_BUILTIN_PTESTNZC,
30158 IX86_BUILTIN_VEC_INIT_V2SI,
30159 IX86_BUILTIN_VEC_INIT_V4HI,
30160 IX86_BUILTIN_VEC_INIT_V8QI,
30161 IX86_BUILTIN_VEC_EXT_V2DF,
30162 IX86_BUILTIN_VEC_EXT_V2DI,
30163 IX86_BUILTIN_VEC_EXT_V4SF,
30164 IX86_BUILTIN_VEC_EXT_V4SI,
30165 IX86_BUILTIN_VEC_EXT_V8HI,
30166 IX86_BUILTIN_VEC_EXT_V2SI,
30167 IX86_BUILTIN_VEC_EXT_V4HI,
30168 IX86_BUILTIN_VEC_EXT_V16QI,
30169 IX86_BUILTIN_VEC_SET_V2DI,
30170 IX86_BUILTIN_VEC_SET_V4SF,
30171 IX86_BUILTIN_VEC_SET_V4SI,
30172 IX86_BUILTIN_VEC_SET_V8HI,
30173 IX86_BUILTIN_VEC_SET_V4HI,
30174 IX86_BUILTIN_VEC_SET_V16QI,
30176 IX86_BUILTIN_VEC_PACK_SFIX,
30177 IX86_BUILTIN_VEC_PACK_SFIX256,
30180 IX86_BUILTIN_CRC32QI,
30181 IX86_BUILTIN_CRC32HI,
30182 IX86_BUILTIN_CRC32SI,
30183 IX86_BUILTIN_CRC32DI,
30185 IX86_BUILTIN_PCMPESTRI128,
30186 IX86_BUILTIN_PCMPESTRM128,
30187 IX86_BUILTIN_PCMPESTRA128,
30188 IX86_BUILTIN_PCMPESTRC128,
30189 IX86_BUILTIN_PCMPESTRO128,
30190 IX86_BUILTIN_PCMPESTRS128,
30191 IX86_BUILTIN_PCMPESTRZ128,
30192 IX86_BUILTIN_PCMPISTRI128,
30193 IX86_BUILTIN_PCMPISTRM128,
30194 IX86_BUILTIN_PCMPISTRA128,
30195 IX86_BUILTIN_PCMPISTRC128,
30196 IX86_BUILTIN_PCMPISTRO128,
30197 IX86_BUILTIN_PCMPISTRS128,
30198 IX86_BUILTIN_PCMPISTRZ128,
30200 IX86_BUILTIN_PCMPGTQ,
30202 /* AES instructions */
30203 IX86_BUILTIN_AESENC128,
30204 IX86_BUILTIN_AESENCLAST128,
30205 IX86_BUILTIN_AESDEC128,
30206 IX86_BUILTIN_AESDECLAST128,
30207 IX86_BUILTIN_AESIMC128,
30208 IX86_BUILTIN_AESKEYGENASSIST128,
30210 /* PCLMUL instruction */
30211 IX86_BUILTIN_PCLMULQDQ128,
30214 IX86_BUILTIN_ADDPD256,
30215 IX86_BUILTIN_ADDPS256,
30216 IX86_BUILTIN_ADDSUBPD256,
30217 IX86_BUILTIN_ADDSUBPS256,
30218 IX86_BUILTIN_ANDPD256,
30219 IX86_BUILTIN_ANDPS256,
30220 IX86_BUILTIN_ANDNPD256,
30221 IX86_BUILTIN_ANDNPS256,
30222 IX86_BUILTIN_BLENDPD256,
30223 IX86_BUILTIN_BLENDPS256,
30224 IX86_BUILTIN_BLENDVPD256,
30225 IX86_BUILTIN_BLENDVPS256,
30226 IX86_BUILTIN_DIVPD256,
30227 IX86_BUILTIN_DIVPS256,
30228 IX86_BUILTIN_DPPS256,
30229 IX86_BUILTIN_HADDPD256,
30230 IX86_BUILTIN_HADDPS256,
30231 IX86_BUILTIN_HSUBPD256,
30232 IX86_BUILTIN_HSUBPS256,
30233 IX86_BUILTIN_MAXPD256,
30234 IX86_BUILTIN_MAXPS256,
30235 IX86_BUILTIN_MINPD256,
30236 IX86_BUILTIN_MINPS256,
30237 IX86_BUILTIN_MULPD256,
30238 IX86_BUILTIN_MULPS256,
30239 IX86_BUILTIN_ORPD256,
30240 IX86_BUILTIN_ORPS256,
30241 IX86_BUILTIN_SHUFPD256,
30242 IX86_BUILTIN_SHUFPS256,
30243 IX86_BUILTIN_SUBPD256,
30244 IX86_BUILTIN_SUBPS256,
30245 IX86_BUILTIN_XORPD256,
30246 IX86_BUILTIN_XORPS256,
30247 IX86_BUILTIN_CMPSD,
30248 IX86_BUILTIN_CMPSS,
30249 IX86_BUILTIN_CMPPD,
30250 IX86_BUILTIN_CMPPS,
30251 IX86_BUILTIN_CMPPD256,
30252 IX86_BUILTIN_CMPPS256,
30253 IX86_BUILTIN_CVTDQ2PD256,
30254 IX86_BUILTIN_CVTDQ2PS256,
30255 IX86_BUILTIN_CVTPD2PS256,
30256 IX86_BUILTIN_CVTPS2DQ256,
30257 IX86_BUILTIN_CVTPS2PD256,
30258 IX86_BUILTIN_CVTTPD2DQ256,
30259 IX86_BUILTIN_CVTPD2DQ256,
30260 IX86_BUILTIN_CVTTPS2DQ256,
30261 IX86_BUILTIN_EXTRACTF128PD256,
30262 IX86_BUILTIN_EXTRACTF128PS256,
30263 IX86_BUILTIN_EXTRACTF128SI256,
30264 IX86_BUILTIN_VZEROALL,
30265 IX86_BUILTIN_VZEROUPPER,
30266 IX86_BUILTIN_VPERMILVARPD,
30267 IX86_BUILTIN_VPERMILVARPS,
30268 IX86_BUILTIN_VPERMILVARPD256,
30269 IX86_BUILTIN_VPERMILVARPS256,
30270 IX86_BUILTIN_VPERMILPD,
30271 IX86_BUILTIN_VPERMILPS,
30272 IX86_BUILTIN_VPERMILPD256,
30273 IX86_BUILTIN_VPERMILPS256,
30274 IX86_BUILTIN_VPERMIL2PD,
30275 IX86_BUILTIN_VPERMIL2PS,
30276 IX86_BUILTIN_VPERMIL2PD256,
30277 IX86_BUILTIN_VPERMIL2PS256,
30278 IX86_BUILTIN_VPERM2F128PD256,
30279 IX86_BUILTIN_VPERM2F128PS256,
30280 IX86_BUILTIN_VPERM2F128SI256,
30281 IX86_BUILTIN_VBROADCASTSS,
30282 IX86_BUILTIN_VBROADCASTSD256,
30283 IX86_BUILTIN_VBROADCASTSS256,
30284 IX86_BUILTIN_VBROADCASTPD256,
30285 IX86_BUILTIN_VBROADCASTPS256,
30286 IX86_BUILTIN_VINSERTF128PD256,
30287 IX86_BUILTIN_VINSERTF128PS256,
30288 IX86_BUILTIN_VINSERTF128SI256,
30289 IX86_BUILTIN_LOADUPD256,
30290 IX86_BUILTIN_LOADUPS256,
30291 IX86_BUILTIN_STOREUPD256,
30292 IX86_BUILTIN_STOREUPS256,
30293 IX86_BUILTIN_LDDQU256,
30294 IX86_BUILTIN_MOVNTDQ256,
30295 IX86_BUILTIN_MOVNTPD256,
30296 IX86_BUILTIN_MOVNTPS256,
30297 IX86_BUILTIN_LOADDQU256,
30298 IX86_BUILTIN_STOREDQU256,
30299 IX86_BUILTIN_MASKLOADPD,
30300 IX86_BUILTIN_MASKLOADPS,
30301 IX86_BUILTIN_MASKSTOREPD,
30302 IX86_BUILTIN_MASKSTOREPS,
30303 IX86_BUILTIN_MASKLOADPD256,
30304 IX86_BUILTIN_MASKLOADPS256,
30305 IX86_BUILTIN_MASKSTOREPD256,
30306 IX86_BUILTIN_MASKSTOREPS256,
30307 IX86_BUILTIN_MOVSHDUP256,
30308 IX86_BUILTIN_MOVSLDUP256,
30309 IX86_BUILTIN_MOVDDUP256,
30311 IX86_BUILTIN_SQRTPD256,
30312 IX86_BUILTIN_SQRTPS256,
30313 IX86_BUILTIN_SQRTPS_NR256,
30314 IX86_BUILTIN_RSQRTPS256,
30315 IX86_BUILTIN_RSQRTPS_NR256,
30317 IX86_BUILTIN_RCPPS256,
30319 IX86_BUILTIN_ROUNDPD256,
30320 IX86_BUILTIN_ROUNDPS256,
30322 IX86_BUILTIN_FLOORPD256,
30323 IX86_BUILTIN_CEILPD256,
30324 IX86_BUILTIN_TRUNCPD256,
30325 IX86_BUILTIN_RINTPD256,
30326 IX86_BUILTIN_ROUNDPD_AZ256,
30328 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
30329 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
30330 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
30332 IX86_BUILTIN_FLOORPS256,
30333 IX86_BUILTIN_CEILPS256,
30334 IX86_BUILTIN_TRUNCPS256,
30335 IX86_BUILTIN_RINTPS256,
30336 IX86_BUILTIN_ROUNDPS_AZ256,
30338 IX86_BUILTIN_FLOORPS_SFIX256,
30339 IX86_BUILTIN_CEILPS_SFIX256,
30340 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
30342 IX86_BUILTIN_UNPCKHPD256,
30343 IX86_BUILTIN_UNPCKLPD256,
30344 IX86_BUILTIN_UNPCKHPS256,
30345 IX86_BUILTIN_UNPCKLPS256,
30347 IX86_BUILTIN_SI256_SI,
30348 IX86_BUILTIN_PS256_PS,
30349 IX86_BUILTIN_PD256_PD,
30350 IX86_BUILTIN_SI_SI256,
30351 IX86_BUILTIN_PS_PS256,
30352 IX86_BUILTIN_PD_PD256,
30354 IX86_BUILTIN_VTESTZPD,
30355 IX86_BUILTIN_VTESTCPD,
30356 IX86_BUILTIN_VTESTNZCPD,
30357 IX86_BUILTIN_VTESTZPS,
30358 IX86_BUILTIN_VTESTCPS,
30359 IX86_BUILTIN_VTESTNZCPS,
30360 IX86_BUILTIN_VTESTZPD256,
30361 IX86_BUILTIN_VTESTCPD256,
30362 IX86_BUILTIN_VTESTNZCPD256,
30363 IX86_BUILTIN_VTESTZPS256,
30364 IX86_BUILTIN_VTESTCPS256,
30365 IX86_BUILTIN_VTESTNZCPS256,
30366 IX86_BUILTIN_PTESTZ256,
30367 IX86_BUILTIN_PTESTC256,
30368 IX86_BUILTIN_PTESTNZC256,
30370 IX86_BUILTIN_MOVMSKPD256,
30371 IX86_BUILTIN_MOVMSKPS256,
30374 IX86_BUILTIN_MPSADBW256,
30375 IX86_BUILTIN_PABSB256,
30376 IX86_BUILTIN_PABSW256,
30377 IX86_BUILTIN_PABSD256,
30378 IX86_BUILTIN_PACKSSDW256,
30379 IX86_BUILTIN_PACKSSWB256,
30380 IX86_BUILTIN_PACKUSDW256,
30381 IX86_BUILTIN_PACKUSWB256,
30382 IX86_BUILTIN_PADDB256,
30383 IX86_BUILTIN_PADDW256,
30384 IX86_BUILTIN_PADDD256,
30385 IX86_BUILTIN_PADDQ256,
30386 IX86_BUILTIN_PADDSB256,
30387 IX86_BUILTIN_PADDSW256,
30388 IX86_BUILTIN_PADDUSB256,
30389 IX86_BUILTIN_PADDUSW256,
30390 IX86_BUILTIN_PALIGNR256,
30391 IX86_BUILTIN_AND256I,
30392 IX86_BUILTIN_ANDNOT256I,
30393 IX86_BUILTIN_PAVGB256,
30394 IX86_BUILTIN_PAVGW256,
30395 IX86_BUILTIN_PBLENDVB256,
30396 IX86_BUILTIN_PBLENDVW256,
30397 IX86_BUILTIN_PCMPEQB256,
30398 IX86_BUILTIN_PCMPEQW256,
30399 IX86_BUILTIN_PCMPEQD256,
30400 IX86_BUILTIN_PCMPEQQ256,
30401 IX86_BUILTIN_PCMPGTB256,
30402 IX86_BUILTIN_PCMPGTW256,
30403 IX86_BUILTIN_PCMPGTD256,
30404 IX86_BUILTIN_PCMPGTQ256,
30405 IX86_BUILTIN_PHADDW256,
30406 IX86_BUILTIN_PHADDD256,
30407 IX86_BUILTIN_PHADDSW256,
30408 IX86_BUILTIN_PHSUBW256,
30409 IX86_BUILTIN_PHSUBD256,
30410 IX86_BUILTIN_PHSUBSW256,
30411 IX86_BUILTIN_PMADDUBSW256,
30412 IX86_BUILTIN_PMADDWD256,
30413 IX86_BUILTIN_PMAXSB256,
30414 IX86_BUILTIN_PMAXSW256,
30415 IX86_BUILTIN_PMAXSD256,
30416 IX86_BUILTIN_PMAXUB256,
30417 IX86_BUILTIN_PMAXUW256,
30418 IX86_BUILTIN_PMAXUD256,
30419 IX86_BUILTIN_PMINSB256,
30420 IX86_BUILTIN_PMINSW256,
30421 IX86_BUILTIN_PMINSD256,
30422 IX86_BUILTIN_PMINUB256,
30423 IX86_BUILTIN_PMINUW256,
30424 IX86_BUILTIN_PMINUD256,
30425 IX86_BUILTIN_PMOVMSKB256,
30426 IX86_BUILTIN_PMOVSXBW256,
30427 IX86_BUILTIN_PMOVSXBD256,
30428 IX86_BUILTIN_PMOVSXBQ256,
30429 IX86_BUILTIN_PMOVSXWD256,
30430 IX86_BUILTIN_PMOVSXWQ256,
30431 IX86_BUILTIN_PMOVSXDQ256,
30432 IX86_BUILTIN_PMOVZXBW256,
30433 IX86_BUILTIN_PMOVZXBD256,
30434 IX86_BUILTIN_PMOVZXBQ256,
30435 IX86_BUILTIN_PMOVZXWD256,
30436 IX86_BUILTIN_PMOVZXWQ256,
30437 IX86_BUILTIN_PMOVZXDQ256,
30438 IX86_BUILTIN_PMULDQ256,
30439 IX86_BUILTIN_PMULHRSW256,
30440 IX86_BUILTIN_PMULHUW256,
30441 IX86_BUILTIN_PMULHW256,
30442 IX86_BUILTIN_PMULLW256,
30443 IX86_BUILTIN_PMULLD256,
30444 IX86_BUILTIN_PMULUDQ256,
30445 IX86_BUILTIN_POR256,
30446 IX86_BUILTIN_PSADBW256,
30447 IX86_BUILTIN_PSHUFB256,
30448 IX86_BUILTIN_PSHUFD256,
30449 IX86_BUILTIN_PSHUFHW256,
30450 IX86_BUILTIN_PSHUFLW256,
30451 IX86_BUILTIN_PSIGNB256,
30452 IX86_BUILTIN_PSIGNW256,
30453 IX86_BUILTIN_PSIGND256,
30454 IX86_BUILTIN_PSLLDQI256,
30455 IX86_BUILTIN_PSLLWI256,
30456 IX86_BUILTIN_PSLLW256,
30457 IX86_BUILTIN_PSLLDI256,
30458 IX86_BUILTIN_PSLLD256,
30459 IX86_BUILTIN_PSLLQI256,
30460 IX86_BUILTIN_PSLLQ256,
30461 IX86_BUILTIN_PSRAWI256,
30462 IX86_BUILTIN_PSRAW256,
30463 IX86_BUILTIN_PSRADI256,
30464 IX86_BUILTIN_PSRAD256,
30465 IX86_BUILTIN_PSRLDQI256,
30466 IX86_BUILTIN_PSRLWI256,
30467 IX86_BUILTIN_PSRLW256,
30468 IX86_BUILTIN_PSRLDI256,
30469 IX86_BUILTIN_PSRLD256,
30470 IX86_BUILTIN_PSRLQI256,
30471 IX86_BUILTIN_PSRLQ256,
30472 IX86_BUILTIN_PSUBB256,
30473 IX86_BUILTIN_PSUBW256,
30474 IX86_BUILTIN_PSUBD256,
30475 IX86_BUILTIN_PSUBQ256,
30476 IX86_BUILTIN_PSUBSB256,
30477 IX86_BUILTIN_PSUBSW256,
30478 IX86_BUILTIN_PSUBUSB256,
30479 IX86_BUILTIN_PSUBUSW256,
30480 IX86_BUILTIN_PUNPCKHBW256,
30481 IX86_BUILTIN_PUNPCKHWD256,
30482 IX86_BUILTIN_PUNPCKHDQ256,
30483 IX86_BUILTIN_PUNPCKHQDQ256,
30484 IX86_BUILTIN_PUNPCKLBW256,
30485 IX86_BUILTIN_PUNPCKLWD256,
30486 IX86_BUILTIN_PUNPCKLDQ256,
30487 IX86_BUILTIN_PUNPCKLQDQ256,
30488 IX86_BUILTIN_PXOR256,
30489 IX86_BUILTIN_MOVNTDQA256,
30490 IX86_BUILTIN_VBROADCASTSS_PS,
30491 IX86_BUILTIN_VBROADCASTSS_PS256,
30492 IX86_BUILTIN_VBROADCASTSD_PD256,
30493 IX86_BUILTIN_VBROADCASTSI256,
30494 IX86_BUILTIN_PBLENDD256,
30495 IX86_BUILTIN_PBLENDD128,
30496 IX86_BUILTIN_PBROADCASTB256,
30497 IX86_BUILTIN_PBROADCASTW256,
30498 IX86_BUILTIN_PBROADCASTD256,
30499 IX86_BUILTIN_PBROADCASTQ256,
30500 IX86_BUILTIN_PBROADCASTB128,
30501 IX86_BUILTIN_PBROADCASTW128,
30502 IX86_BUILTIN_PBROADCASTD128,
30503 IX86_BUILTIN_PBROADCASTQ128,
30504 IX86_BUILTIN_VPERMVARSI256,
30505 IX86_BUILTIN_VPERMDF256,
30506 IX86_BUILTIN_VPERMVARSF256,
30507 IX86_BUILTIN_VPERMDI256,
30508 IX86_BUILTIN_VPERMTI256,
30509 IX86_BUILTIN_VEXTRACT128I256,
30510 IX86_BUILTIN_VINSERT128I256,
30511 IX86_BUILTIN_MASKLOADD,
30512 IX86_BUILTIN_MASKLOADQ,
30513 IX86_BUILTIN_MASKLOADD256,
30514 IX86_BUILTIN_MASKLOADQ256,
30515 IX86_BUILTIN_MASKSTORED,
30516 IX86_BUILTIN_MASKSTOREQ,
30517 IX86_BUILTIN_MASKSTORED256,
30518 IX86_BUILTIN_MASKSTOREQ256,
30519 IX86_BUILTIN_PSLLVV4DI,
30520 IX86_BUILTIN_PSLLVV2DI,
30521 IX86_BUILTIN_PSLLVV8SI,
30522 IX86_BUILTIN_PSLLVV4SI,
30523 IX86_BUILTIN_PSRAVV8SI,
30524 IX86_BUILTIN_PSRAVV4SI,
30525 IX86_BUILTIN_PSRLVV4DI,
30526 IX86_BUILTIN_PSRLVV2DI,
30527 IX86_BUILTIN_PSRLVV8SI,
30528 IX86_BUILTIN_PSRLVV4SI,
30530 IX86_BUILTIN_GATHERSIV2DF,
30531 IX86_BUILTIN_GATHERSIV4DF,
30532 IX86_BUILTIN_GATHERDIV2DF,
30533 IX86_BUILTIN_GATHERDIV4DF,
30534 IX86_BUILTIN_GATHERSIV4SF,
30535 IX86_BUILTIN_GATHERSIV8SF,
30536 IX86_BUILTIN_GATHERDIV4SF,
30537 IX86_BUILTIN_GATHERDIV8SF,
30538 IX86_BUILTIN_GATHERSIV2DI,
30539 IX86_BUILTIN_GATHERSIV4DI,
30540 IX86_BUILTIN_GATHERDIV2DI,
30541 IX86_BUILTIN_GATHERDIV4DI,
30542 IX86_BUILTIN_GATHERSIV4SI,
30543 IX86_BUILTIN_GATHERSIV8SI,
30544 IX86_BUILTIN_GATHERDIV4SI,
30545 IX86_BUILTIN_GATHERDIV8SI,
30548 IX86_BUILTIN_SI512_SI256,
30549 IX86_BUILTIN_PD512_PD256,
30550 IX86_BUILTIN_PS512_PS256,
30551 IX86_BUILTIN_SI512_SI,
30552 IX86_BUILTIN_PD512_PD,
30553 IX86_BUILTIN_PS512_PS,
30554 IX86_BUILTIN_ADDPD512,
30555 IX86_BUILTIN_ADDPS512,
30556 IX86_BUILTIN_ADDSD_ROUND,
30557 IX86_BUILTIN_ADDSS_ROUND,
30558 IX86_BUILTIN_ALIGND512,
30559 IX86_BUILTIN_ALIGNQ512,
30560 IX86_BUILTIN_BLENDMD512,
30561 IX86_BUILTIN_BLENDMPD512,
30562 IX86_BUILTIN_BLENDMPS512,
30563 IX86_BUILTIN_BLENDMQ512,
30564 IX86_BUILTIN_BROADCASTF32X4_512,
30565 IX86_BUILTIN_BROADCASTF64X4_512,
30566 IX86_BUILTIN_BROADCASTI32X4_512,
30567 IX86_BUILTIN_BROADCASTI64X4_512,
30568 IX86_BUILTIN_BROADCASTSD512,
30569 IX86_BUILTIN_BROADCASTSS512,
30570 IX86_BUILTIN_CMPD512,
30571 IX86_BUILTIN_CMPPD512,
30572 IX86_BUILTIN_CMPPS512,
30573 IX86_BUILTIN_CMPQ512,
30574 IX86_BUILTIN_CMPSD_MASK,
30575 IX86_BUILTIN_CMPSS_MASK,
30576 IX86_BUILTIN_COMIDF,
30577 IX86_BUILTIN_COMISF,
30578 IX86_BUILTIN_COMPRESSPD512,
30579 IX86_BUILTIN_COMPRESSPDSTORE512,
30580 IX86_BUILTIN_COMPRESSPS512,
30581 IX86_BUILTIN_COMPRESSPSSTORE512,
30582 IX86_BUILTIN_CVTDQ2PD512,
30583 IX86_BUILTIN_CVTDQ2PS512,
30584 IX86_BUILTIN_CVTPD2DQ512,
30585 IX86_BUILTIN_CVTPD2PS512,
30586 IX86_BUILTIN_CVTPD2UDQ512,
30587 IX86_BUILTIN_CVTPH2PS512,
30588 IX86_BUILTIN_CVTPS2DQ512,
30589 IX86_BUILTIN_CVTPS2PD512,
30590 IX86_BUILTIN_CVTPS2PH512,
30591 IX86_BUILTIN_CVTPS2UDQ512,
30592 IX86_BUILTIN_CVTSD2SS_ROUND,
30593 IX86_BUILTIN_CVTSI2SD64,
30594 IX86_BUILTIN_CVTSI2SS32,
30595 IX86_BUILTIN_CVTSI2SS64,
30596 IX86_BUILTIN_CVTSS2SD_ROUND,
30597 IX86_BUILTIN_CVTTPD2DQ512,
30598 IX86_BUILTIN_CVTTPD2UDQ512,
30599 IX86_BUILTIN_CVTTPS2DQ512,
30600 IX86_BUILTIN_CVTTPS2UDQ512,
30601 IX86_BUILTIN_CVTUDQ2PD512,
30602 IX86_BUILTIN_CVTUDQ2PS512,
30603 IX86_BUILTIN_CVTUSI2SD32,
30604 IX86_BUILTIN_CVTUSI2SD64,
30605 IX86_BUILTIN_CVTUSI2SS32,
30606 IX86_BUILTIN_CVTUSI2SS64,
30607 IX86_BUILTIN_DIVPD512,
30608 IX86_BUILTIN_DIVPS512,
30609 IX86_BUILTIN_DIVSD_ROUND,
30610 IX86_BUILTIN_DIVSS_ROUND,
30611 IX86_BUILTIN_EXPANDPD512,
30612 IX86_BUILTIN_EXPANDPD512Z,
30613 IX86_BUILTIN_EXPANDPDLOAD512,
30614 IX86_BUILTIN_EXPANDPDLOAD512Z,
30615 IX86_BUILTIN_EXPANDPS512,
30616 IX86_BUILTIN_EXPANDPS512Z,
30617 IX86_BUILTIN_EXPANDPSLOAD512,
30618 IX86_BUILTIN_EXPANDPSLOAD512Z,
30619 IX86_BUILTIN_EXTRACTF32X4,
30620 IX86_BUILTIN_EXTRACTF64X4,
30621 IX86_BUILTIN_EXTRACTI32X4,
30622 IX86_BUILTIN_EXTRACTI64X4,
30623 IX86_BUILTIN_FIXUPIMMPD512_MASK,
30624 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
30625 IX86_BUILTIN_FIXUPIMMPS512_MASK,
30626 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
30627 IX86_BUILTIN_FIXUPIMMSD128_MASK,
30628 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
30629 IX86_BUILTIN_FIXUPIMMSS128_MASK,
30630 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
30631 IX86_BUILTIN_GETEXPPD512,
30632 IX86_BUILTIN_GETEXPPS512,
30633 IX86_BUILTIN_GETEXPSD128,
30634 IX86_BUILTIN_GETEXPSS128,
30635 IX86_BUILTIN_GETMANTPD512,
30636 IX86_BUILTIN_GETMANTPS512,
30637 IX86_BUILTIN_GETMANTSD128,
30638 IX86_BUILTIN_GETMANTSS128,
30639 IX86_BUILTIN_INSERTF32X4,
30640 IX86_BUILTIN_INSERTF64X4,
30641 IX86_BUILTIN_INSERTI32X4,
30642 IX86_BUILTIN_INSERTI64X4,
30643 IX86_BUILTIN_LOADAPD512,
30644 IX86_BUILTIN_LOADAPS512,
30645 IX86_BUILTIN_LOADDQUDI512,
30646 IX86_BUILTIN_LOADDQUSI512,
30647 IX86_BUILTIN_LOADUPD512,
30648 IX86_BUILTIN_LOADUPS512,
30649 IX86_BUILTIN_MAXPD512,
30650 IX86_BUILTIN_MAXPS512,
30651 IX86_BUILTIN_MAXSD_ROUND,
30652 IX86_BUILTIN_MAXSS_ROUND,
30653 IX86_BUILTIN_MINPD512,
30654 IX86_BUILTIN_MINPS512,
30655 IX86_BUILTIN_MINSD_ROUND,
30656 IX86_BUILTIN_MINSS_ROUND,
30657 IX86_BUILTIN_MOVAPD512,
30658 IX86_BUILTIN_MOVAPS512,
30659 IX86_BUILTIN_MOVDDUP512,
30660 IX86_BUILTIN_MOVDQA32LOAD512,
30661 IX86_BUILTIN_MOVDQA32STORE512,
30662 IX86_BUILTIN_MOVDQA32_512,
30663 IX86_BUILTIN_MOVDQA64LOAD512,
30664 IX86_BUILTIN_MOVDQA64STORE512,
30665 IX86_BUILTIN_MOVDQA64_512,
30666 IX86_BUILTIN_MOVNTDQ512,
30667 IX86_BUILTIN_MOVNTDQA512,
30668 IX86_BUILTIN_MOVNTPD512,
30669 IX86_BUILTIN_MOVNTPS512,
30670 IX86_BUILTIN_MOVSHDUP512,
30671 IX86_BUILTIN_MOVSLDUP512,
30672 IX86_BUILTIN_MULPD512,
30673 IX86_BUILTIN_MULPS512,
30674 IX86_BUILTIN_MULSD_ROUND,
30675 IX86_BUILTIN_MULSS_ROUND,
30676 IX86_BUILTIN_PABSD512,
30677 IX86_BUILTIN_PABSQ512,
30678 IX86_BUILTIN_PADDD512,
30679 IX86_BUILTIN_PADDQ512,
30680 IX86_BUILTIN_PANDD512,
30681 IX86_BUILTIN_PANDND512,
30682 IX86_BUILTIN_PANDNQ512,
30683 IX86_BUILTIN_PANDQ512,
30684 IX86_BUILTIN_PBROADCASTD512,
30685 IX86_BUILTIN_PBROADCASTD512_GPR,
30686 IX86_BUILTIN_PBROADCASTMB512,
30687 IX86_BUILTIN_PBROADCASTMW512,
30688 IX86_BUILTIN_PBROADCASTQ512,
30689 IX86_BUILTIN_PBROADCASTQ512_GPR,
30690 IX86_BUILTIN_PCMPEQD512_MASK,
30691 IX86_BUILTIN_PCMPEQQ512_MASK,
30692 IX86_BUILTIN_PCMPGTD512_MASK,
30693 IX86_BUILTIN_PCMPGTQ512_MASK,
30694 IX86_BUILTIN_PCOMPRESSD512,
30695 IX86_BUILTIN_PCOMPRESSDSTORE512,
30696 IX86_BUILTIN_PCOMPRESSQ512,
30697 IX86_BUILTIN_PCOMPRESSQSTORE512,
30698 IX86_BUILTIN_PEXPANDD512,
30699 IX86_BUILTIN_PEXPANDD512Z,
30700 IX86_BUILTIN_PEXPANDDLOAD512,
30701 IX86_BUILTIN_PEXPANDDLOAD512Z,
30702 IX86_BUILTIN_PEXPANDQ512,
30703 IX86_BUILTIN_PEXPANDQ512Z,
30704 IX86_BUILTIN_PEXPANDQLOAD512,
30705 IX86_BUILTIN_PEXPANDQLOAD512Z,
30706 IX86_BUILTIN_PMAXSD512,
30707 IX86_BUILTIN_PMAXSQ512,
30708 IX86_BUILTIN_PMAXUD512,
30709 IX86_BUILTIN_PMAXUQ512,
30710 IX86_BUILTIN_PMINSD512,
30711 IX86_BUILTIN_PMINSQ512,
30712 IX86_BUILTIN_PMINUD512,
30713 IX86_BUILTIN_PMINUQ512,
30714 IX86_BUILTIN_PMOVDB512,
30715 IX86_BUILTIN_PMOVDB512_MEM,
30716 IX86_BUILTIN_PMOVDW512,
30717 IX86_BUILTIN_PMOVDW512_MEM,
30718 IX86_BUILTIN_PMOVQB512,
30719 IX86_BUILTIN_PMOVQB512_MEM,
30720 IX86_BUILTIN_PMOVQD512,
30721 IX86_BUILTIN_PMOVQD512_MEM,
30722 IX86_BUILTIN_PMOVQW512,
30723 IX86_BUILTIN_PMOVQW512_MEM,
30724 IX86_BUILTIN_PMOVSDB512,
30725 IX86_BUILTIN_PMOVSDB512_MEM,
30726 IX86_BUILTIN_PMOVSDW512,
30727 IX86_BUILTIN_PMOVSDW512_MEM,
30728 IX86_BUILTIN_PMOVSQB512,
30729 IX86_BUILTIN_PMOVSQB512_MEM,
30730 IX86_BUILTIN_PMOVSQD512,
30731 IX86_BUILTIN_PMOVSQD512_MEM,
30732 IX86_BUILTIN_PMOVSQW512,
30733 IX86_BUILTIN_PMOVSQW512_MEM,
30734 IX86_BUILTIN_PMOVSXBD512,
30735 IX86_BUILTIN_PMOVSXBQ512,
30736 IX86_BUILTIN_PMOVSXDQ512,
30737 IX86_BUILTIN_PMOVSXWD512,
30738 IX86_BUILTIN_PMOVSXWQ512,
30739 IX86_BUILTIN_PMOVUSDB512,
30740 IX86_BUILTIN_PMOVUSDB512_MEM,
30741 IX86_BUILTIN_PMOVUSDW512,
30742 IX86_BUILTIN_PMOVUSDW512_MEM,
30743 IX86_BUILTIN_PMOVUSQB512,
30744 IX86_BUILTIN_PMOVUSQB512_MEM,
30745 IX86_BUILTIN_PMOVUSQD512,
30746 IX86_BUILTIN_PMOVUSQD512_MEM,
30747 IX86_BUILTIN_PMOVUSQW512,
30748 IX86_BUILTIN_PMOVUSQW512_MEM,
30749 IX86_BUILTIN_PMOVZXBD512,
30750 IX86_BUILTIN_PMOVZXBQ512,
30751 IX86_BUILTIN_PMOVZXDQ512,
30752 IX86_BUILTIN_PMOVZXWD512,
30753 IX86_BUILTIN_PMOVZXWQ512,
30754 IX86_BUILTIN_PMULDQ512,
30755 IX86_BUILTIN_PMULLD512,
30756 IX86_BUILTIN_PMULUDQ512,
30757 IX86_BUILTIN_PORD512,
30758 IX86_BUILTIN_PORQ512,
30759 IX86_BUILTIN_PROLD512,
30760 IX86_BUILTIN_PROLQ512,
30761 IX86_BUILTIN_PROLVD512,
30762 IX86_BUILTIN_PROLVQ512,
30763 IX86_BUILTIN_PRORD512,
30764 IX86_BUILTIN_PRORQ512,
30765 IX86_BUILTIN_PRORVD512,
30766 IX86_BUILTIN_PRORVQ512,
30767 IX86_BUILTIN_PSHUFD512,
30768 IX86_BUILTIN_PSLLD512,
30769 IX86_BUILTIN_PSLLDI512,
30770 IX86_BUILTIN_PSLLQ512,
30771 IX86_BUILTIN_PSLLQI512,
30772 IX86_BUILTIN_PSLLVV16SI,
30773 IX86_BUILTIN_PSLLVV8DI,
30774 IX86_BUILTIN_PSRAD512,
30775 IX86_BUILTIN_PSRADI512,
30776 IX86_BUILTIN_PSRAQ512,
30777 IX86_BUILTIN_PSRAQI512,
30778 IX86_BUILTIN_PSRAVV16SI,
30779 IX86_BUILTIN_PSRAVV8DI,
30780 IX86_BUILTIN_PSRLD512,
30781 IX86_BUILTIN_PSRLDI512,
30782 IX86_BUILTIN_PSRLQ512,
30783 IX86_BUILTIN_PSRLQI512,
30784 IX86_BUILTIN_PSRLVV16SI,
30785 IX86_BUILTIN_PSRLVV8DI,
30786 IX86_BUILTIN_PSUBD512,
30787 IX86_BUILTIN_PSUBQ512,
30788 IX86_BUILTIN_PTESTMD512,
30789 IX86_BUILTIN_PTESTMQ512,
30790 IX86_BUILTIN_PTESTNMD512,
30791 IX86_BUILTIN_PTESTNMQ512,
30792 IX86_BUILTIN_PUNPCKHDQ512,
30793 IX86_BUILTIN_PUNPCKHQDQ512,
30794 IX86_BUILTIN_PUNPCKLDQ512,
30795 IX86_BUILTIN_PUNPCKLQDQ512,
30796 IX86_BUILTIN_PXORD512,
30797 IX86_BUILTIN_PXORQ512,
30798 IX86_BUILTIN_RCP14PD512,
30799 IX86_BUILTIN_RCP14PS512,
30800 IX86_BUILTIN_RCP14SD,
30801 IX86_BUILTIN_RCP14SS,
30802 IX86_BUILTIN_RNDSCALEPD,
30803 IX86_BUILTIN_RNDSCALEPS,
30804 IX86_BUILTIN_RNDSCALESD,
30805 IX86_BUILTIN_RNDSCALESS,
30806 IX86_BUILTIN_RSQRT14PD512,
30807 IX86_BUILTIN_RSQRT14PS512,
30808 IX86_BUILTIN_RSQRT14SD,
30809 IX86_BUILTIN_RSQRT14SS,
30810 IX86_BUILTIN_SCALEFPD512,
30811 IX86_BUILTIN_SCALEFPS512,
30812 IX86_BUILTIN_SCALEFSD,
30813 IX86_BUILTIN_SCALEFSS,
30814 IX86_BUILTIN_SHUFPD512,
30815 IX86_BUILTIN_SHUFPS512,
30816 IX86_BUILTIN_SHUF_F32x4,
30817 IX86_BUILTIN_SHUF_F64x2,
30818 IX86_BUILTIN_SHUF_I32x4,
30819 IX86_BUILTIN_SHUF_I64x2,
30820 IX86_BUILTIN_SQRTPD512,
30821 IX86_BUILTIN_SQRTPD512_MASK,
30822 IX86_BUILTIN_SQRTPS512_MASK,
30823 IX86_BUILTIN_SQRTPS_NR512,
30824 IX86_BUILTIN_SQRTSD_ROUND,
30825 IX86_BUILTIN_SQRTSS_ROUND,
30826 IX86_BUILTIN_STOREAPD512,
30827 IX86_BUILTIN_STOREAPS512,
30828 IX86_BUILTIN_STOREDQUDI512,
30829 IX86_BUILTIN_STOREDQUSI512,
30830 IX86_BUILTIN_STOREUPD512,
30831 IX86_BUILTIN_STOREUPS512,
30832 IX86_BUILTIN_SUBPD512,
30833 IX86_BUILTIN_SUBPS512,
30834 IX86_BUILTIN_SUBSD_ROUND,
30835 IX86_BUILTIN_SUBSS_ROUND,
30836 IX86_BUILTIN_UCMPD512,
30837 IX86_BUILTIN_UCMPQ512,
30838 IX86_BUILTIN_UNPCKHPD512,
30839 IX86_BUILTIN_UNPCKHPS512,
30840 IX86_BUILTIN_UNPCKLPD512,
30841 IX86_BUILTIN_UNPCKLPS512,
30842 IX86_BUILTIN_VCVTSD2SI32,
30843 IX86_BUILTIN_VCVTSD2SI64,
30844 IX86_BUILTIN_VCVTSD2USI32,
30845 IX86_BUILTIN_VCVTSD2USI64,
30846 IX86_BUILTIN_VCVTSS2SI32,
30847 IX86_BUILTIN_VCVTSS2SI64,
30848 IX86_BUILTIN_VCVTSS2USI32,
30849 IX86_BUILTIN_VCVTSS2USI64,
30850 IX86_BUILTIN_VCVTTSD2SI32,
30851 IX86_BUILTIN_VCVTTSD2SI64,
30852 IX86_BUILTIN_VCVTTSD2USI32,
30853 IX86_BUILTIN_VCVTTSD2USI64,
30854 IX86_BUILTIN_VCVTTSS2SI32,
30855 IX86_BUILTIN_VCVTTSS2SI64,
30856 IX86_BUILTIN_VCVTTSS2USI32,
30857 IX86_BUILTIN_VCVTTSS2USI64,
30858 IX86_BUILTIN_VFMADDPD512_MASK,
30859 IX86_BUILTIN_VFMADDPD512_MASK3,
30860 IX86_BUILTIN_VFMADDPD512_MASKZ,
30861 IX86_BUILTIN_VFMADDPS512_MASK,
30862 IX86_BUILTIN_VFMADDPS512_MASK3,
30863 IX86_BUILTIN_VFMADDPS512_MASKZ,
30864 IX86_BUILTIN_VFMADDSD3_ROUND,
30865 IX86_BUILTIN_VFMADDSS3_ROUND,
30866 IX86_BUILTIN_VFMADDSUBPD512_MASK,
30867 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
30868 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
30869 IX86_BUILTIN_VFMADDSUBPS512_MASK,
30870 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
30871 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
30872 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
30873 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
30874 IX86_BUILTIN_VFMSUBPD512_MASK3,
30875 IX86_BUILTIN_VFMSUBPS512_MASK3,
30876 IX86_BUILTIN_VFMSUBSD3_MASK3,
30877 IX86_BUILTIN_VFMSUBSS3_MASK3,
30878 IX86_BUILTIN_VFNMADDPD512_MASK,
30879 IX86_BUILTIN_VFNMADDPS512_MASK,
30880 IX86_BUILTIN_VFNMSUBPD512_MASK,
30881 IX86_BUILTIN_VFNMSUBPD512_MASK3,
30882 IX86_BUILTIN_VFNMSUBPS512_MASK,
30883 IX86_BUILTIN_VFNMSUBPS512_MASK3,
30884 IX86_BUILTIN_VPCLZCNTD512,
30885 IX86_BUILTIN_VPCLZCNTQ512,
30886 IX86_BUILTIN_VPCONFLICTD512,
30887 IX86_BUILTIN_VPCONFLICTQ512,
30888 IX86_BUILTIN_VPERMDF512,
30889 IX86_BUILTIN_VPERMDI512,
30890 IX86_BUILTIN_VPERMI2VARD512,
30891 IX86_BUILTIN_VPERMI2VARPD512,
30892 IX86_BUILTIN_VPERMI2VARPS512,
30893 IX86_BUILTIN_VPERMI2VARQ512,
30894 IX86_BUILTIN_VPERMILPD512,
30895 IX86_BUILTIN_VPERMILPS512,
30896 IX86_BUILTIN_VPERMILVARPD512,
30897 IX86_BUILTIN_VPERMILVARPS512,
30898 IX86_BUILTIN_VPERMT2VARD512,
30899 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
30900 IX86_BUILTIN_VPERMT2VARPD512,
30901 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
30902 IX86_BUILTIN_VPERMT2VARPS512,
30903 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
30904 IX86_BUILTIN_VPERMT2VARQ512,
30905 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
30906 IX86_BUILTIN_VPERMVARDF512,
30907 IX86_BUILTIN_VPERMVARDI512,
30908 IX86_BUILTIN_VPERMVARSF512,
30909 IX86_BUILTIN_VPERMVARSI512,
30910 IX86_BUILTIN_VTERNLOGD512_MASK,
30911 IX86_BUILTIN_VTERNLOGD512_MASKZ,
30912 IX86_BUILTIN_VTERNLOGQ512_MASK,
30913 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
30915 /* Mask arithmetic operations */
30916 IX86_BUILTIN_KAND16,
30917 IX86_BUILTIN_KANDN16,
30918 IX86_BUILTIN_KNOT16,
30919 IX86_BUILTIN_KOR16,
30920 IX86_BUILTIN_KORTESTC16,
30921 IX86_BUILTIN_KORTESTZ16,
30922 IX86_BUILTIN_KUNPCKBW,
30923 IX86_BUILTIN_KXNOR16,
30924 IX86_BUILTIN_KXOR16,
30925 IX86_BUILTIN_KMOV16,
30928 IX86_BUILTIN_PMOVUSQD256_MEM,
30929 IX86_BUILTIN_PMOVUSQD128_MEM,
30930 IX86_BUILTIN_PMOVSQD256_MEM,
30931 IX86_BUILTIN_PMOVSQD128_MEM,
30932 IX86_BUILTIN_PMOVQD256_MEM,
30933 IX86_BUILTIN_PMOVQD128_MEM,
30934 IX86_BUILTIN_PMOVUSQW256_MEM,
30935 IX86_BUILTIN_PMOVUSQW128_MEM,
30936 IX86_BUILTIN_PMOVSQW256_MEM,
30937 IX86_BUILTIN_PMOVSQW128_MEM,
30938 IX86_BUILTIN_PMOVQW256_MEM,
30939 IX86_BUILTIN_PMOVQW128_MEM,
30940 IX86_BUILTIN_PMOVUSQB256_MEM,
30941 IX86_BUILTIN_PMOVUSQB128_MEM,
30942 IX86_BUILTIN_PMOVSQB256_MEM,
30943 IX86_BUILTIN_PMOVSQB128_MEM,
30944 IX86_BUILTIN_PMOVQB256_MEM,
30945 IX86_BUILTIN_PMOVQB128_MEM,
30946 IX86_BUILTIN_PMOVUSDW256_MEM,
30947 IX86_BUILTIN_PMOVUSDW128_MEM,
30948 IX86_BUILTIN_PMOVSDW256_MEM,
30949 IX86_BUILTIN_PMOVSDW128_MEM,
30950 IX86_BUILTIN_PMOVDW256_MEM,
30951 IX86_BUILTIN_PMOVDW128_MEM,
30952 IX86_BUILTIN_PMOVUSDB256_MEM,
30953 IX86_BUILTIN_PMOVUSDB128_MEM,
30954 IX86_BUILTIN_PMOVSDB256_MEM,
30955 IX86_BUILTIN_PMOVSDB128_MEM,
30956 IX86_BUILTIN_PMOVDB256_MEM,
30957 IX86_BUILTIN_PMOVDB128_MEM,
30958 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
30959 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
30960 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
30961 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
30962 IX86_BUILTIN_MOVDQA64STORE256_MASK,
30963 IX86_BUILTIN_MOVDQA64STORE128_MASK,
30964 IX86_BUILTIN_MOVDQA32STORE256_MASK,
30965 IX86_BUILTIN_MOVDQA32STORE128_MASK,
30966 IX86_BUILTIN_LOADAPD256_MASK,
30967 IX86_BUILTIN_LOADAPD128_MASK,
30968 IX86_BUILTIN_LOADAPS256_MASK,
30969 IX86_BUILTIN_LOADAPS128_MASK,
30970 IX86_BUILTIN_STOREAPD256_MASK,
30971 IX86_BUILTIN_STOREAPD128_MASK,
30972 IX86_BUILTIN_STOREAPS256_MASK,
30973 IX86_BUILTIN_STOREAPS128_MASK,
30974 IX86_BUILTIN_LOADUPD256_MASK,
30975 IX86_BUILTIN_LOADUPD128_MASK,
30976 IX86_BUILTIN_LOADUPS256_MASK,
30977 IX86_BUILTIN_LOADUPS128_MASK,
30978 IX86_BUILTIN_STOREUPD256_MASK,
30979 IX86_BUILTIN_STOREUPD128_MASK,
30980 IX86_BUILTIN_STOREUPS256_MASK,
30981 IX86_BUILTIN_STOREUPS128_MASK,
30982 IX86_BUILTIN_LOADDQUDI256_MASK,
30983 IX86_BUILTIN_LOADDQUDI128_MASK,
30984 IX86_BUILTIN_LOADDQUSI256_MASK,
30985 IX86_BUILTIN_LOADDQUSI128_MASK,
30986 IX86_BUILTIN_LOADDQUHI256_MASK,
30987 IX86_BUILTIN_LOADDQUHI128_MASK,
30988 IX86_BUILTIN_LOADDQUQI256_MASK,
30989 IX86_BUILTIN_LOADDQUQI128_MASK,
30990 IX86_BUILTIN_STOREDQUDI256_MASK,
30991 IX86_BUILTIN_STOREDQUDI128_MASK,
30992 IX86_BUILTIN_STOREDQUSI256_MASK,
30993 IX86_BUILTIN_STOREDQUSI128_MASK,
30994 IX86_BUILTIN_STOREDQUHI256_MASK,
30995 IX86_BUILTIN_STOREDQUHI128_MASK,
30996 IX86_BUILTIN_STOREDQUQI256_MASK,
30997 IX86_BUILTIN_STOREDQUQI128_MASK,
30998 IX86_BUILTIN_COMPRESSPDSTORE256,
30999 IX86_BUILTIN_COMPRESSPDSTORE128,
31000 IX86_BUILTIN_COMPRESSPSSTORE256,
31001 IX86_BUILTIN_COMPRESSPSSTORE128,
31002 IX86_BUILTIN_PCOMPRESSQSTORE256,
31003 IX86_BUILTIN_PCOMPRESSQSTORE128,
31004 IX86_BUILTIN_PCOMPRESSDSTORE256,
31005 IX86_BUILTIN_PCOMPRESSDSTORE128,
31006 IX86_BUILTIN_EXPANDPDLOAD256,
31007 IX86_BUILTIN_EXPANDPDLOAD128,
31008 IX86_BUILTIN_EXPANDPSLOAD256,
31009 IX86_BUILTIN_EXPANDPSLOAD128,
31010 IX86_BUILTIN_PEXPANDQLOAD256,
31011 IX86_BUILTIN_PEXPANDQLOAD128,
31012 IX86_BUILTIN_PEXPANDDLOAD256,
31013 IX86_BUILTIN_PEXPANDDLOAD128,
31014 IX86_BUILTIN_EXPANDPDLOAD256Z,
31015 IX86_BUILTIN_EXPANDPDLOAD128Z,
31016 IX86_BUILTIN_EXPANDPSLOAD256Z,
31017 IX86_BUILTIN_EXPANDPSLOAD128Z,
31018 IX86_BUILTIN_PEXPANDQLOAD256Z,
31019 IX86_BUILTIN_PEXPANDQLOAD128Z,
31020 IX86_BUILTIN_PEXPANDDLOAD256Z,
31021 IX86_BUILTIN_PEXPANDDLOAD128Z,
31022 IX86_BUILTIN_PALIGNR256_MASK,
31023 IX86_BUILTIN_PALIGNR128_MASK,
31024 IX86_BUILTIN_MOVDQA64_256_MASK,
31025 IX86_BUILTIN_MOVDQA64_128_MASK,
31026 IX86_BUILTIN_MOVDQA32_256_MASK,
31027 IX86_BUILTIN_MOVDQA32_128_MASK,
31028 IX86_BUILTIN_MOVAPD256_MASK,
31029 IX86_BUILTIN_MOVAPD128_MASK,
31030 IX86_BUILTIN_MOVAPS256_MASK,
31031 IX86_BUILTIN_MOVAPS128_MASK,
31032 IX86_BUILTIN_MOVDQUHI256_MASK,
31033 IX86_BUILTIN_MOVDQUHI128_MASK,
31034 IX86_BUILTIN_MOVDQUQI256_MASK,
31035 IX86_BUILTIN_MOVDQUQI128_MASK,
31036 IX86_BUILTIN_MINPS128_MASK,
31037 IX86_BUILTIN_MAXPS128_MASK,
31038 IX86_BUILTIN_MINPD128_MASK,
31039 IX86_BUILTIN_MAXPD128_MASK,
31040 IX86_BUILTIN_MAXPD256_MASK,
31041 IX86_BUILTIN_MAXPS256_MASK,
31042 IX86_BUILTIN_MINPD256_MASK,
31043 IX86_BUILTIN_MINPS256_MASK,
31044 IX86_BUILTIN_MULPS128_MASK,
31045 IX86_BUILTIN_DIVPS128_MASK,
31046 IX86_BUILTIN_MULPD128_MASK,
31047 IX86_BUILTIN_DIVPD128_MASK,
31048 IX86_BUILTIN_DIVPD256_MASK,
31049 IX86_BUILTIN_DIVPS256_MASK,
31050 IX86_BUILTIN_MULPD256_MASK,
31051 IX86_BUILTIN_MULPS256_MASK,
31052 IX86_BUILTIN_ADDPD128_MASK,
31053 IX86_BUILTIN_ADDPD256_MASK,
31054 IX86_BUILTIN_ADDPS128_MASK,
31055 IX86_BUILTIN_ADDPS256_MASK,
31056 IX86_BUILTIN_SUBPD128_MASK,
31057 IX86_BUILTIN_SUBPD256_MASK,
31058 IX86_BUILTIN_SUBPS128_MASK,
31059 IX86_BUILTIN_SUBPS256_MASK,
31060 IX86_BUILTIN_XORPD256_MASK,
31061 IX86_BUILTIN_XORPD128_MASK,
31062 IX86_BUILTIN_XORPS256_MASK,
31063 IX86_BUILTIN_XORPS128_MASK,
31064 IX86_BUILTIN_ORPD256_MASK,
31065 IX86_BUILTIN_ORPD128_MASK,
31066 IX86_BUILTIN_ORPS256_MASK,
31067 IX86_BUILTIN_ORPS128_MASK,
31068 IX86_BUILTIN_BROADCASTF32x2_256,
31069 IX86_BUILTIN_BROADCASTI32x2_256,
31070 IX86_BUILTIN_BROADCASTI32x2_128,
31071 IX86_BUILTIN_BROADCASTF64X2_256,
31072 IX86_BUILTIN_BROADCASTI64X2_256,
31073 IX86_BUILTIN_BROADCASTF32X4_256,
31074 IX86_BUILTIN_BROADCASTI32X4_256,
31075 IX86_BUILTIN_EXTRACTF32X4_256,
31076 IX86_BUILTIN_EXTRACTI32X4_256,
31077 IX86_BUILTIN_DBPSADBW256,
31078 IX86_BUILTIN_DBPSADBW128,
31079 IX86_BUILTIN_CVTTPD2QQ256,
31080 IX86_BUILTIN_CVTTPD2QQ128,
31081 IX86_BUILTIN_CVTTPD2UQQ256,
31082 IX86_BUILTIN_CVTTPD2UQQ128,
31083 IX86_BUILTIN_CVTPD2QQ256,
31084 IX86_BUILTIN_CVTPD2QQ128,
31085 IX86_BUILTIN_CVTPD2UQQ256,
31086 IX86_BUILTIN_CVTPD2UQQ128,
31087 IX86_BUILTIN_CVTPD2UDQ256_MASK,
31088 IX86_BUILTIN_CVTPD2UDQ128_MASK,
31089 IX86_BUILTIN_CVTTPS2QQ256,
31090 IX86_BUILTIN_CVTTPS2QQ128,
31091 IX86_BUILTIN_CVTTPS2UQQ256,
31092 IX86_BUILTIN_CVTTPS2UQQ128,
31093 IX86_BUILTIN_CVTTPS2DQ256_MASK,
31094 IX86_BUILTIN_CVTTPS2DQ128_MASK,
31095 IX86_BUILTIN_CVTTPS2UDQ256,
31096 IX86_BUILTIN_CVTTPS2UDQ128,
31097 IX86_BUILTIN_CVTTPD2DQ256_MASK,
31098 IX86_BUILTIN_CVTTPD2DQ128_MASK,
31099 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
31100 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
31101 IX86_BUILTIN_CVTPD2DQ256_MASK,
31102 IX86_BUILTIN_CVTPD2DQ128_MASK,
31103 IX86_BUILTIN_CVTDQ2PD256_MASK,
31104 IX86_BUILTIN_CVTDQ2PD128_MASK,
31105 IX86_BUILTIN_CVTUDQ2PD256_MASK,
31106 IX86_BUILTIN_CVTUDQ2PD128_MASK,
31107 IX86_BUILTIN_CVTDQ2PS256_MASK,
31108 IX86_BUILTIN_CVTDQ2PS128_MASK,
31109 IX86_BUILTIN_CVTUDQ2PS256_MASK,
31110 IX86_BUILTIN_CVTUDQ2PS128_MASK,
31111 IX86_BUILTIN_CVTPS2PD256_MASK,
31112 IX86_BUILTIN_CVTPS2PD128_MASK,
31113 IX86_BUILTIN_PBROADCASTB256_MASK,
31114 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
31115 IX86_BUILTIN_PBROADCASTB128_MASK,
31116 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
31117 IX86_BUILTIN_PBROADCASTW256_MASK,
31118 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
31119 IX86_BUILTIN_PBROADCASTW128_MASK,
31120 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
31121 IX86_BUILTIN_PBROADCASTD256_MASK,
31122 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
31123 IX86_BUILTIN_PBROADCASTD128_MASK,
31124 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
31125 IX86_BUILTIN_PBROADCASTQ256_MASK,
31126 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
31127 IX86_BUILTIN_PBROADCASTQ128_MASK,
31128 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
31129 IX86_BUILTIN_BROADCASTSS256,
31130 IX86_BUILTIN_BROADCASTSS128,
31131 IX86_BUILTIN_BROADCASTSD256,
31132 IX86_BUILTIN_EXTRACTF64X2_256,
31133 IX86_BUILTIN_EXTRACTI64X2_256,
31134 IX86_BUILTIN_INSERTF32X4_256,
31135 IX86_BUILTIN_INSERTI32X4_256,
31136 IX86_BUILTIN_PMOVSXBW256_MASK,
31137 IX86_BUILTIN_PMOVSXBW128_MASK,
31138 IX86_BUILTIN_PMOVSXBD256_MASK,
31139 IX86_BUILTIN_PMOVSXBD128_MASK,
31140 IX86_BUILTIN_PMOVSXBQ256_MASK,
31141 IX86_BUILTIN_PMOVSXBQ128_MASK,
31142 IX86_BUILTIN_PMOVSXWD256_MASK,
31143 IX86_BUILTIN_PMOVSXWD128_MASK,
31144 IX86_BUILTIN_PMOVSXWQ256_MASK,
31145 IX86_BUILTIN_PMOVSXWQ128_MASK,
31146 IX86_BUILTIN_PMOVSXDQ256_MASK,
31147 IX86_BUILTIN_PMOVSXDQ128_MASK,
31148 IX86_BUILTIN_PMOVZXBW256_MASK,
31149 IX86_BUILTIN_PMOVZXBW128_MASK,
31150 IX86_BUILTIN_PMOVZXBD256_MASK,
31151 IX86_BUILTIN_PMOVZXBD128_MASK,
31152 IX86_BUILTIN_PMOVZXBQ256_MASK,
31153 IX86_BUILTIN_PMOVZXBQ128_MASK,
31154 IX86_BUILTIN_PMOVZXWD256_MASK,
31155 IX86_BUILTIN_PMOVZXWD128_MASK,
31156 IX86_BUILTIN_PMOVZXWQ256_MASK,
31157 IX86_BUILTIN_PMOVZXWQ128_MASK,
31158 IX86_BUILTIN_PMOVZXDQ256_MASK,
31159 IX86_BUILTIN_PMOVZXDQ128_MASK,
31160 IX86_BUILTIN_REDUCEPD256_MASK,
31161 IX86_BUILTIN_REDUCEPD128_MASK,
31162 IX86_BUILTIN_REDUCEPS256_MASK,
31163 IX86_BUILTIN_REDUCEPS128_MASK,
31164 IX86_BUILTIN_REDUCESD_MASK,
31165 IX86_BUILTIN_REDUCESS_MASK,
31166 IX86_BUILTIN_VPERMVARHI256_MASK,
31167 IX86_BUILTIN_VPERMVARHI128_MASK,
31168 IX86_BUILTIN_VPERMT2VARHI256,
31169 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
31170 IX86_BUILTIN_VPERMT2VARHI128,
31171 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
31172 IX86_BUILTIN_VPERMI2VARHI256,
31173 IX86_BUILTIN_VPERMI2VARHI128,
31174 IX86_BUILTIN_RCP14PD256,
31175 IX86_BUILTIN_RCP14PD128,
31176 IX86_BUILTIN_RCP14PS256,
31177 IX86_BUILTIN_RCP14PS128,
31178 IX86_BUILTIN_RSQRT14PD256_MASK,
31179 IX86_BUILTIN_RSQRT14PD128_MASK,
31180 IX86_BUILTIN_RSQRT14PS256_MASK,
31181 IX86_BUILTIN_RSQRT14PS128_MASK,
31182 IX86_BUILTIN_SQRTPD256_MASK,
31183 IX86_BUILTIN_SQRTPD128_MASK,
31184 IX86_BUILTIN_SQRTPS256_MASK,
31185 IX86_BUILTIN_SQRTPS128_MASK,
31186 IX86_BUILTIN_PADDB128_MASK,
31187 IX86_BUILTIN_PADDW128_MASK,
31188 IX86_BUILTIN_PADDD128_MASK,
31189 IX86_BUILTIN_PADDQ128_MASK,
31190 IX86_BUILTIN_PSUBB128_MASK,
31191 IX86_BUILTIN_PSUBW128_MASK,
31192 IX86_BUILTIN_PSUBD128_MASK,
31193 IX86_BUILTIN_PSUBQ128_MASK,
31194 IX86_BUILTIN_PADDSB128_MASK,
31195 IX86_BUILTIN_PADDSW128_MASK,
31196 IX86_BUILTIN_PSUBSB128_MASK,
31197 IX86_BUILTIN_PSUBSW128_MASK,
31198 IX86_BUILTIN_PADDUSB128_MASK,
31199 IX86_BUILTIN_PADDUSW128_MASK,
31200 IX86_BUILTIN_PSUBUSB128_MASK,
31201 IX86_BUILTIN_PSUBUSW128_MASK,
31202 IX86_BUILTIN_PADDB256_MASK,
31203 IX86_BUILTIN_PADDW256_MASK,
31204 IX86_BUILTIN_PADDD256_MASK,
31205 IX86_BUILTIN_PADDQ256_MASK,
31206 IX86_BUILTIN_PADDSB256_MASK,
31207 IX86_BUILTIN_PADDSW256_MASK,
31208 IX86_BUILTIN_PADDUSB256_MASK,
31209 IX86_BUILTIN_PADDUSW256_MASK,
31210 IX86_BUILTIN_PSUBB256_MASK,
31211 IX86_BUILTIN_PSUBW256_MASK,
31212 IX86_BUILTIN_PSUBD256_MASK,
31213 IX86_BUILTIN_PSUBQ256_MASK,
31214 IX86_BUILTIN_PSUBSB256_MASK,
31215 IX86_BUILTIN_PSUBSW256_MASK,
31216 IX86_BUILTIN_PSUBUSB256_MASK,
31217 IX86_BUILTIN_PSUBUSW256_MASK,
31218 IX86_BUILTIN_SHUF_F64x2_256,
31219 IX86_BUILTIN_SHUF_I64x2_256,
31220 IX86_BUILTIN_SHUF_I32x4_256,
31221 IX86_BUILTIN_SHUF_F32x4_256,
31222 IX86_BUILTIN_PMOVWB128,
31223 IX86_BUILTIN_PMOVWB256,
31224 IX86_BUILTIN_PMOVSWB128,
31225 IX86_BUILTIN_PMOVSWB256,
31226 IX86_BUILTIN_PMOVUSWB128,
31227 IX86_BUILTIN_PMOVUSWB256,
31228 IX86_BUILTIN_PMOVDB128,
31229 IX86_BUILTIN_PMOVDB256,
31230 IX86_BUILTIN_PMOVSDB128,
31231 IX86_BUILTIN_PMOVSDB256,
31232 IX86_BUILTIN_PMOVUSDB128,
31233 IX86_BUILTIN_PMOVUSDB256,
31234 IX86_BUILTIN_PMOVDW128,
31235 IX86_BUILTIN_PMOVDW256,
31236 IX86_BUILTIN_PMOVSDW128,
31237 IX86_BUILTIN_PMOVSDW256,
31238 IX86_BUILTIN_PMOVUSDW128,
31239 IX86_BUILTIN_PMOVUSDW256,
31240 IX86_BUILTIN_PMOVQB128,
31241 IX86_BUILTIN_PMOVQB256,
31242 IX86_BUILTIN_PMOVSQB128,
31243 IX86_BUILTIN_PMOVSQB256,
31244 IX86_BUILTIN_PMOVUSQB128,
31245 IX86_BUILTIN_PMOVUSQB256,
31246 IX86_BUILTIN_PMOVQW128,
31247 IX86_BUILTIN_PMOVQW256,
31248 IX86_BUILTIN_PMOVSQW128,
31249 IX86_BUILTIN_PMOVSQW256,
31250 IX86_BUILTIN_PMOVUSQW128,
31251 IX86_BUILTIN_PMOVUSQW256,
31252 IX86_BUILTIN_PMOVQD128,
31253 IX86_BUILTIN_PMOVQD256,
31254 IX86_BUILTIN_PMOVSQD128,
31255 IX86_BUILTIN_PMOVSQD256,
31256 IX86_BUILTIN_PMOVUSQD128,
31257 IX86_BUILTIN_PMOVUSQD256,
31258 IX86_BUILTIN_RANGEPD256,
31259 IX86_BUILTIN_RANGEPD128,
31260 IX86_BUILTIN_RANGEPS256,
31261 IX86_BUILTIN_RANGEPS128,
31262 IX86_BUILTIN_GETEXPPS256,
31263 IX86_BUILTIN_GETEXPPD256,
31264 IX86_BUILTIN_GETEXPPS128,
31265 IX86_BUILTIN_GETEXPPD128,
31266 IX86_BUILTIN_FIXUPIMMPD256_MASK,
31267 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
31268 IX86_BUILTIN_FIXUPIMMPS256_MASK,
31269 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
31270 IX86_BUILTIN_FIXUPIMMPD128_MASK,
31271 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
31272 IX86_BUILTIN_FIXUPIMMPS128_MASK,
31273 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
31274 IX86_BUILTIN_PABSQ256,
31275 IX86_BUILTIN_PABSQ128,
31276 IX86_BUILTIN_PABSD256_MASK,
31277 IX86_BUILTIN_PABSD128_MASK,
31278 IX86_BUILTIN_PMULHRSW256_MASK,
31279 IX86_BUILTIN_PMULHRSW128_MASK,
31280 IX86_BUILTIN_PMULHUW128_MASK,
31281 IX86_BUILTIN_PMULHUW256_MASK,
31282 IX86_BUILTIN_PMULHW256_MASK,
31283 IX86_BUILTIN_PMULHW128_MASK,
31284 IX86_BUILTIN_PMULLW256_MASK,
31285 IX86_BUILTIN_PMULLW128_MASK,
31286 IX86_BUILTIN_PMULLQ256,
31287 IX86_BUILTIN_PMULLQ128,
31288 IX86_BUILTIN_ANDPD256_MASK,
31289 IX86_BUILTIN_ANDPD128_MASK,
31290 IX86_BUILTIN_ANDPS256_MASK,
31291 IX86_BUILTIN_ANDPS128_MASK,
31292 IX86_BUILTIN_ANDNPD256_MASK,
31293 IX86_BUILTIN_ANDNPD128_MASK,
31294 IX86_BUILTIN_ANDNPS256_MASK,
31295 IX86_BUILTIN_ANDNPS128_MASK,
31296 IX86_BUILTIN_PSLLWI128_MASK,
31297 IX86_BUILTIN_PSLLDI128_MASK,
31298 IX86_BUILTIN_PSLLQI128_MASK,
31299 IX86_BUILTIN_PSLLW128_MASK,
31300 IX86_BUILTIN_PSLLD128_MASK,
31301 IX86_BUILTIN_PSLLQ128_MASK,
31302 IX86_BUILTIN_PSLLWI256_MASK ,
31303 IX86_BUILTIN_PSLLW256_MASK,
31304 IX86_BUILTIN_PSLLDI256_MASK,
31305 IX86_BUILTIN_PSLLD256_MASK,
31306 IX86_BUILTIN_PSLLQI256_MASK,
31307 IX86_BUILTIN_PSLLQ256_MASK,
31308 IX86_BUILTIN_PSRADI128_MASK,
31309 IX86_BUILTIN_PSRAD128_MASK,
31310 IX86_BUILTIN_PSRADI256_MASK,
31311 IX86_BUILTIN_PSRAD256_MASK,
31312 IX86_BUILTIN_PSRAQI128_MASK,
31313 IX86_BUILTIN_PSRAQ128_MASK,
31314 IX86_BUILTIN_PSRAQI256_MASK,
31315 IX86_BUILTIN_PSRAQ256_MASK,
31316 IX86_BUILTIN_PANDD256,
31317 IX86_BUILTIN_PANDD128,
31318 IX86_BUILTIN_PSRLDI128_MASK,
31319 IX86_BUILTIN_PSRLD128_MASK,
31320 IX86_BUILTIN_PSRLDI256_MASK,
31321 IX86_BUILTIN_PSRLD256_MASK,
31322 IX86_BUILTIN_PSRLQI128_MASK,
31323 IX86_BUILTIN_PSRLQ128_MASK,
31324 IX86_BUILTIN_PSRLQI256_MASK,
31325 IX86_BUILTIN_PSRLQ256_MASK,
31326 IX86_BUILTIN_PANDQ256,
31327 IX86_BUILTIN_PANDQ128,
31328 IX86_BUILTIN_PANDND256,
31329 IX86_BUILTIN_PANDND128,
31330 IX86_BUILTIN_PANDNQ256,
31331 IX86_BUILTIN_PANDNQ128,
31332 IX86_BUILTIN_PORD256,
31333 IX86_BUILTIN_PORD128,
31334 IX86_BUILTIN_PORQ256,
31335 IX86_BUILTIN_PORQ128,
31336 IX86_BUILTIN_PXORD256,
31337 IX86_BUILTIN_PXORD128,
31338 IX86_BUILTIN_PXORQ256,
31339 IX86_BUILTIN_PXORQ128,
31340 IX86_BUILTIN_PACKSSWB256_MASK,
31341 IX86_BUILTIN_PACKSSWB128_MASK,
31342 IX86_BUILTIN_PACKUSWB256_MASK,
31343 IX86_BUILTIN_PACKUSWB128_MASK,
31344 IX86_BUILTIN_RNDSCALEPS256,
31345 IX86_BUILTIN_RNDSCALEPD256,
31346 IX86_BUILTIN_RNDSCALEPS128,
31347 IX86_BUILTIN_RNDSCALEPD128,
31348 IX86_BUILTIN_VTERNLOGQ256_MASK,
31349 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
31350 IX86_BUILTIN_VTERNLOGD256_MASK,
31351 IX86_BUILTIN_VTERNLOGD256_MASKZ,
31352 IX86_BUILTIN_VTERNLOGQ128_MASK,
31353 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
31354 IX86_BUILTIN_VTERNLOGD128_MASK,
31355 IX86_BUILTIN_VTERNLOGD128_MASKZ,
31356 IX86_BUILTIN_SCALEFPD256,
31357 IX86_BUILTIN_SCALEFPS256,
31358 IX86_BUILTIN_SCALEFPD128,
31359 IX86_BUILTIN_SCALEFPS128,
31360 IX86_BUILTIN_VFMADDPD256_MASK,
31361 IX86_BUILTIN_VFMADDPD256_MASK3,
31362 IX86_BUILTIN_VFMADDPD256_MASKZ,
31363 IX86_BUILTIN_VFMADDPD128_MASK,
31364 IX86_BUILTIN_VFMADDPD128_MASK3,
31365 IX86_BUILTIN_VFMADDPD128_MASKZ,
31366 IX86_BUILTIN_VFMADDPS256_MASK,
31367 IX86_BUILTIN_VFMADDPS256_MASK3,
31368 IX86_BUILTIN_VFMADDPS256_MASKZ,
31369 IX86_BUILTIN_VFMADDPS128_MASK,
31370 IX86_BUILTIN_VFMADDPS128_MASK3,
31371 IX86_BUILTIN_VFMADDPS128_MASKZ,
31372 IX86_BUILTIN_VFMSUBPD256_MASK3,
31373 IX86_BUILTIN_VFMSUBPD128_MASK3,
31374 IX86_BUILTIN_VFMSUBPS256_MASK3,
31375 IX86_BUILTIN_VFMSUBPS128_MASK3,
31376 IX86_BUILTIN_VFNMADDPD256_MASK,
31377 IX86_BUILTIN_VFNMADDPD128_MASK,
31378 IX86_BUILTIN_VFNMADDPS256_MASK,
31379 IX86_BUILTIN_VFNMADDPS128_MASK,
31380 IX86_BUILTIN_VFNMSUBPD256_MASK,
31381 IX86_BUILTIN_VFNMSUBPD256_MASK3,
31382 IX86_BUILTIN_VFNMSUBPD128_MASK,
31383 IX86_BUILTIN_VFNMSUBPD128_MASK3,
31384 IX86_BUILTIN_VFNMSUBPS256_MASK,
31385 IX86_BUILTIN_VFNMSUBPS256_MASK3,
31386 IX86_BUILTIN_VFNMSUBPS128_MASK,
31387 IX86_BUILTIN_VFNMSUBPS128_MASK3,
31388 IX86_BUILTIN_VFMADDSUBPD256_MASK,
31389 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
31390 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
31391 IX86_BUILTIN_VFMADDSUBPD128_MASK,
31392 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
31393 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
31394 IX86_BUILTIN_VFMADDSUBPS256_MASK,
31395 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
31396 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
31397 IX86_BUILTIN_VFMADDSUBPS128_MASK,
31398 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
31399 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
31400 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
31401 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
31402 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
31403 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
31404 IX86_BUILTIN_INSERTF64X2_256,
31405 IX86_BUILTIN_INSERTI64X2_256,
31406 IX86_BUILTIN_PSRAVV16HI,
31407 IX86_BUILTIN_PSRAVV8HI,
31408 IX86_BUILTIN_PMADDUBSW256_MASK,
31409 IX86_BUILTIN_PMADDUBSW128_MASK,
31410 IX86_BUILTIN_PMADDWD256_MASK,
31411 IX86_BUILTIN_PMADDWD128_MASK,
31412 IX86_BUILTIN_PSRLVV16HI,
31413 IX86_BUILTIN_PSRLVV8HI,
31414 IX86_BUILTIN_CVTPS2DQ256_MASK,
31415 IX86_BUILTIN_CVTPS2DQ128_MASK,
31416 IX86_BUILTIN_CVTPS2UDQ256,
31417 IX86_BUILTIN_CVTPS2UDQ128,
31418 IX86_BUILTIN_CVTPS2QQ256,
31419 IX86_BUILTIN_CVTPS2QQ128,
31420 IX86_BUILTIN_CVTPS2UQQ256,
31421 IX86_BUILTIN_CVTPS2UQQ128,
31422 IX86_BUILTIN_GETMANTPS256,
31423 IX86_BUILTIN_GETMANTPS128,
31424 IX86_BUILTIN_GETMANTPD256,
31425 IX86_BUILTIN_GETMANTPD128,
31426 IX86_BUILTIN_MOVDDUP256_MASK,
31427 IX86_BUILTIN_MOVDDUP128_MASK,
31428 IX86_BUILTIN_MOVSHDUP256_MASK,
31429 IX86_BUILTIN_MOVSHDUP128_MASK,
31430 IX86_BUILTIN_MOVSLDUP256_MASK,
31431 IX86_BUILTIN_MOVSLDUP128_MASK,
31432 IX86_BUILTIN_CVTQQ2PS256,
31433 IX86_BUILTIN_CVTQQ2PS128,
31434 IX86_BUILTIN_CVTUQQ2PS256,
31435 IX86_BUILTIN_CVTUQQ2PS128,
31436 IX86_BUILTIN_CVTQQ2PD256,
31437 IX86_BUILTIN_CVTQQ2PD128,
31438 IX86_BUILTIN_CVTUQQ2PD256,
31439 IX86_BUILTIN_CVTUQQ2PD128,
31440 IX86_BUILTIN_VPERMT2VARQ256,
31441 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
31442 IX86_BUILTIN_VPERMT2VARD256,
31443 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
31444 IX86_BUILTIN_VPERMI2VARQ256,
31445 IX86_BUILTIN_VPERMI2VARD256,
31446 IX86_BUILTIN_VPERMT2VARPD256,
31447 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
31448 IX86_BUILTIN_VPERMT2VARPS256,
31449 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
31450 IX86_BUILTIN_VPERMI2VARPD256,
31451 IX86_BUILTIN_VPERMI2VARPS256,
31452 IX86_BUILTIN_VPERMT2VARQ128,
31453 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
31454 IX86_BUILTIN_VPERMT2VARD128,
31455 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
31456 IX86_BUILTIN_VPERMI2VARQ128,
31457 IX86_BUILTIN_VPERMI2VARD128,
31458 IX86_BUILTIN_VPERMT2VARPD128,
31459 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
31460 IX86_BUILTIN_VPERMT2VARPS128,
31461 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
31462 IX86_BUILTIN_VPERMI2VARPD128,
31463 IX86_BUILTIN_VPERMI2VARPS128,
31464 IX86_BUILTIN_PSHUFB256_MASK,
31465 IX86_BUILTIN_PSHUFB128_MASK,
31466 IX86_BUILTIN_PSHUFHW256_MASK,
31467 IX86_BUILTIN_PSHUFHW128_MASK,
31468 IX86_BUILTIN_PSHUFLW256_MASK,
31469 IX86_BUILTIN_PSHUFLW128_MASK,
31470 IX86_BUILTIN_PSHUFD256_MASK,
31471 IX86_BUILTIN_PSHUFD128_MASK,
31472 IX86_BUILTIN_SHUFPD256_MASK,
31473 IX86_BUILTIN_SHUFPD128_MASK,
31474 IX86_BUILTIN_SHUFPS256_MASK,
31475 IX86_BUILTIN_SHUFPS128_MASK,
31476 IX86_BUILTIN_PROLVQ256,
31477 IX86_BUILTIN_PROLVQ128,
31478 IX86_BUILTIN_PROLQ256,
31479 IX86_BUILTIN_PROLQ128,
31480 IX86_BUILTIN_PRORVQ256,
31481 IX86_BUILTIN_PRORVQ128,
31482 IX86_BUILTIN_PRORQ256,
31483 IX86_BUILTIN_PRORQ128,
31484 IX86_BUILTIN_PSRAVQ128,
31485 IX86_BUILTIN_PSRAVQ256,
31486 IX86_BUILTIN_PSLLVV4DI_MASK,
31487 IX86_BUILTIN_PSLLVV2DI_MASK,
31488 IX86_BUILTIN_PSLLVV8SI_MASK,
31489 IX86_BUILTIN_PSLLVV4SI_MASK,
31490 IX86_BUILTIN_PSRAVV8SI_MASK,
31491 IX86_BUILTIN_PSRAVV4SI_MASK,
31492 IX86_BUILTIN_PSRLVV4DI_MASK,
31493 IX86_BUILTIN_PSRLVV2DI_MASK,
31494 IX86_BUILTIN_PSRLVV8SI_MASK,
31495 IX86_BUILTIN_PSRLVV4SI_MASK,
31496 IX86_BUILTIN_PSRAWI256_MASK,
31497 IX86_BUILTIN_PSRAW256_MASK,
31498 IX86_BUILTIN_PSRAWI128_MASK,
31499 IX86_BUILTIN_PSRAW128_MASK,
31500 IX86_BUILTIN_PSRLWI256_MASK,
31501 IX86_BUILTIN_PSRLW256_MASK,
31502 IX86_BUILTIN_PSRLWI128_MASK,
31503 IX86_BUILTIN_PSRLW128_MASK,
31504 IX86_BUILTIN_PRORVD256,
31505 IX86_BUILTIN_PROLVD256,
31506 IX86_BUILTIN_PRORD256,
31507 IX86_BUILTIN_PROLD256,
31508 IX86_BUILTIN_PRORVD128,
31509 IX86_BUILTIN_PROLVD128,
31510 IX86_BUILTIN_PRORD128,
31511 IX86_BUILTIN_PROLD128,
31512 IX86_BUILTIN_FPCLASSPD256,
31513 IX86_BUILTIN_FPCLASSPD128,
31514 IX86_BUILTIN_FPCLASSSD,
31515 IX86_BUILTIN_FPCLASSPS256,
31516 IX86_BUILTIN_FPCLASSPS128,
31517 IX86_BUILTIN_FPCLASSSS,
31518 IX86_BUILTIN_CVTB2MASK128,
31519 IX86_BUILTIN_CVTB2MASK256,
31520 IX86_BUILTIN_CVTW2MASK128,
31521 IX86_BUILTIN_CVTW2MASK256,
31522 IX86_BUILTIN_CVTD2MASK128,
31523 IX86_BUILTIN_CVTD2MASK256,
31524 IX86_BUILTIN_CVTQ2MASK128,
31525 IX86_BUILTIN_CVTQ2MASK256,
31526 IX86_BUILTIN_CVTMASK2B128,
31527 IX86_BUILTIN_CVTMASK2B256,
31528 IX86_BUILTIN_CVTMASK2W128,
31529 IX86_BUILTIN_CVTMASK2W256,
31530 IX86_BUILTIN_CVTMASK2D128,
31531 IX86_BUILTIN_CVTMASK2D256,
31532 IX86_BUILTIN_CVTMASK2Q128,
31533 IX86_BUILTIN_CVTMASK2Q256,
31534 IX86_BUILTIN_PCMPEQB128_MASK,
31535 IX86_BUILTIN_PCMPEQB256_MASK,
31536 IX86_BUILTIN_PCMPEQW128_MASK,
31537 IX86_BUILTIN_PCMPEQW256_MASK,
31538 IX86_BUILTIN_PCMPEQD128_MASK,
31539 IX86_BUILTIN_PCMPEQD256_MASK,
31540 IX86_BUILTIN_PCMPEQQ128_MASK,
31541 IX86_BUILTIN_PCMPEQQ256_MASK,
31542 IX86_BUILTIN_PCMPGTB128_MASK,
31543 IX86_BUILTIN_PCMPGTB256_MASK,
31544 IX86_BUILTIN_PCMPGTW128_MASK,
31545 IX86_BUILTIN_PCMPGTW256_MASK,
31546 IX86_BUILTIN_PCMPGTD128_MASK,
31547 IX86_BUILTIN_PCMPGTD256_MASK,
31548 IX86_BUILTIN_PCMPGTQ128_MASK,
31549 IX86_BUILTIN_PCMPGTQ256_MASK,
31550 IX86_BUILTIN_PTESTMB128,
31551 IX86_BUILTIN_PTESTMB256,
31552 IX86_BUILTIN_PTESTMW128,
31553 IX86_BUILTIN_PTESTMW256,
31554 IX86_BUILTIN_PTESTMD128,
31555 IX86_BUILTIN_PTESTMD256,
31556 IX86_BUILTIN_PTESTMQ128,
31557 IX86_BUILTIN_PTESTMQ256,
31558 IX86_BUILTIN_PTESTNMB128,
31559 IX86_BUILTIN_PTESTNMB256,
31560 IX86_BUILTIN_PTESTNMW128,
31561 IX86_BUILTIN_PTESTNMW256,
31562 IX86_BUILTIN_PTESTNMD128,
31563 IX86_BUILTIN_PTESTNMD256,
31564 IX86_BUILTIN_PTESTNMQ128,
31565 IX86_BUILTIN_PTESTNMQ256,
31566 IX86_BUILTIN_PBROADCASTMB128,
31567 IX86_BUILTIN_PBROADCASTMB256,
31568 IX86_BUILTIN_PBROADCASTMW128,
31569 IX86_BUILTIN_PBROADCASTMW256,
31570 IX86_BUILTIN_COMPRESSPD256,
31571 IX86_BUILTIN_COMPRESSPD128,
31572 IX86_BUILTIN_COMPRESSPS256,
31573 IX86_BUILTIN_COMPRESSPS128,
31574 IX86_BUILTIN_PCOMPRESSQ256,
31575 IX86_BUILTIN_PCOMPRESSQ128,
31576 IX86_BUILTIN_PCOMPRESSD256,
31577 IX86_BUILTIN_PCOMPRESSD128,
31578 IX86_BUILTIN_EXPANDPD256,
31579 IX86_BUILTIN_EXPANDPD128,
31580 IX86_BUILTIN_EXPANDPS256,
31581 IX86_BUILTIN_EXPANDPS128,
31582 IX86_BUILTIN_PEXPANDQ256,
31583 IX86_BUILTIN_PEXPANDQ128,
31584 IX86_BUILTIN_PEXPANDD256,
31585 IX86_BUILTIN_PEXPANDD128,
31586 IX86_BUILTIN_EXPANDPD256Z,
31587 IX86_BUILTIN_EXPANDPD128Z,
31588 IX86_BUILTIN_EXPANDPS256Z,
31589 IX86_BUILTIN_EXPANDPS128Z,
31590 IX86_BUILTIN_PEXPANDQ256Z,
31591 IX86_BUILTIN_PEXPANDQ128Z,
31592 IX86_BUILTIN_PEXPANDD256Z,
31593 IX86_BUILTIN_PEXPANDD128Z,
31594 IX86_BUILTIN_PMAXSD256_MASK,
31595 IX86_BUILTIN_PMINSD256_MASK,
31596 IX86_BUILTIN_PMAXUD256_MASK,
31597 IX86_BUILTIN_PMINUD256_MASK,
31598 IX86_BUILTIN_PMAXSD128_MASK,
31599 IX86_BUILTIN_PMINSD128_MASK,
31600 IX86_BUILTIN_PMAXUD128_MASK,
31601 IX86_BUILTIN_PMINUD128_MASK,
31602 IX86_BUILTIN_PMAXSQ256_MASK,
31603 IX86_BUILTIN_PMINSQ256_MASK,
31604 IX86_BUILTIN_PMAXUQ256_MASK,
31605 IX86_BUILTIN_PMINUQ256_MASK,
31606 IX86_BUILTIN_PMAXSQ128_MASK,
31607 IX86_BUILTIN_PMINSQ128_MASK,
31608 IX86_BUILTIN_PMAXUQ128_MASK,
31609 IX86_BUILTIN_PMINUQ128_MASK,
31610 IX86_BUILTIN_PMINSB256_MASK,
31611 IX86_BUILTIN_PMINUB256_MASK,
31612 IX86_BUILTIN_PMAXSB256_MASK,
31613 IX86_BUILTIN_PMAXUB256_MASK,
31614 IX86_BUILTIN_PMINSB128_MASK,
31615 IX86_BUILTIN_PMINUB128_MASK,
31616 IX86_BUILTIN_PMAXSB128_MASK,
31617 IX86_BUILTIN_PMAXUB128_MASK,
31618 IX86_BUILTIN_PMINSW256_MASK,
31619 IX86_BUILTIN_PMINUW256_MASK,
31620 IX86_BUILTIN_PMAXSW256_MASK,
31621 IX86_BUILTIN_PMAXUW256_MASK,
31622 IX86_BUILTIN_PMINSW128_MASK,
31623 IX86_BUILTIN_PMINUW128_MASK,
31624 IX86_BUILTIN_PMAXSW128_MASK,
31625 IX86_BUILTIN_PMAXUW128_MASK,
31626 IX86_BUILTIN_VPCONFLICTQ256,
31627 IX86_BUILTIN_VPCONFLICTD256,
31628 IX86_BUILTIN_VPCLZCNTQ256,
31629 IX86_BUILTIN_VPCLZCNTD256,
31630 IX86_BUILTIN_UNPCKHPD256_MASK,
31631 IX86_BUILTIN_UNPCKHPD128_MASK,
31632 IX86_BUILTIN_UNPCKHPS256_MASK,
31633 IX86_BUILTIN_UNPCKHPS128_MASK,
31634 IX86_BUILTIN_UNPCKLPD256_MASK,
31635 IX86_BUILTIN_UNPCKLPD128_MASK,
31636 IX86_BUILTIN_UNPCKLPS256_MASK,
31637 IX86_BUILTIN_VPCONFLICTQ128,
31638 IX86_BUILTIN_VPCONFLICTD128,
31639 IX86_BUILTIN_VPCLZCNTQ128,
31640 IX86_BUILTIN_VPCLZCNTD128,
31641 IX86_BUILTIN_UNPCKLPS128_MASK,
31642 IX86_BUILTIN_ALIGND256,
31643 IX86_BUILTIN_ALIGNQ256,
31644 IX86_BUILTIN_ALIGND128,
31645 IX86_BUILTIN_ALIGNQ128,
31646 IX86_BUILTIN_CVTPS2PH256_MASK,
31647 IX86_BUILTIN_CVTPS2PH_MASK,
31648 IX86_BUILTIN_CVTPH2PS_MASK,
31649 IX86_BUILTIN_CVTPH2PS256_MASK,
31650 IX86_BUILTIN_PUNPCKHDQ128_MASK,
31651 IX86_BUILTIN_PUNPCKHDQ256_MASK,
31652 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
31653 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
31654 IX86_BUILTIN_PUNPCKLDQ128_MASK,
31655 IX86_BUILTIN_PUNPCKLDQ256_MASK,
31656 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
31657 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
31658 IX86_BUILTIN_PUNPCKHBW128_MASK,
31659 IX86_BUILTIN_PUNPCKHBW256_MASK,
31660 IX86_BUILTIN_PUNPCKHWD128_MASK,
31661 IX86_BUILTIN_PUNPCKHWD256_MASK,
31662 IX86_BUILTIN_PUNPCKLBW128_MASK,
31663 IX86_BUILTIN_PUNPCKLBW256_MASK,
31664 IX86_BUILTIN_PUNPCKLWD128_MASK,
31665 IX86_BUILTIN_PUNPCKLWD256_MASK,
31666 IX86_BUILTIN_PSLLVV16HI,
31667 IX86_BUILTIN_PSLLVV8HI,
31668 IX86_BUILTIN_PACKSSDW256_MASK,
31669 IX86_BUILTIN_PACKSSDW128_MASK,
31670 IX86_BUILTIN_PACKUSDW256_MASK,
31671 IX86_BUILTIN_PACKUSDW128_MASK,
31672 IX86_BUILTIN_PAVGB256_MASK,
31673 IX86_BUILTIN_PAVGW256_MASK,
31674 IX86_BUILTIN_PAVGB128_MASK,
31675 IX86_BUILTIN_PAVGW128_MASK,
31676 IX86_BUILTIN_VPERMVARSF256_MASK,
31677 IX86_BUILTIN_VPERMVARDF256_MASK,
31678 IX86_BUILTIN_VPERMDF256_MASK,
31679 IX86_BUILTIN_PABSB256_MASK,
31680 IX86_BUILTIN_PABSB128_MASK,
31681 IX86_BUILTIN_PABSW256_MASK,
31682 IX86_BUILTIN_PABSW128_MASK,
31683 IX86_BUILTIN_VPERMILVARPD_MASK,
31684 IX86_BUILTIN_VPERMILVARPS_MASK,
31685 IX86_BUILTIN_VPERMILVARPD256_MASK,
31686 IX86_BUILTIN_VPERMILVARPS256_MASK,
31687 IX86_BUILTIN_VPERMILPD_MASK,
31688 IX86_BUILTIN_VPERMILPS_MASK,
31689 IX86_BUILTIN_VPERMILPD256_MASK,
31690 IX86_BUILTIN_VPERMILPS256_MASK,
31691 IX86_BUILTIN_BLENDMQ256,
31692 IX86_BUILTIN_BLENDMD256,
31693 IX86_BUILTIN_BLENDMPD256,
31694 IX86_BUILTIN_BLENDMPS256,
31695 IX86_BUILTIN_BLENDMQ128,
31696 IX86_BUILTIN_BLENDMD128,
31697 IX86_BUILTIN_BLENDMPD128,
31698 IX86_BUILTIN_BLENDMPS128,
31699 IX86_BUILTIN_BLENDMW256,
31700 IX86_BUILTIN_BLENDMB256,
31701 IX86_BUILTIN_BLENDMW128,
31702 IX86_BUILTIN_BLENDMB128,
31703 IX86_BUILTIN_PMULLD256_MASK,
31704 IX86_BUILTIN_PMULLD128_MASK,
31705 IX86_BUILTIN_PMULUDQ256_MASK,
31706 IX86_BUILTIN_PMULDQ256_MASK,
31707 IX86_BUILTIN_PMULDQ128_MASK,
31708 IX86_BUILTIN_PMULUDQ128_MASK,
31709 IX86_BUILTIN_CVTPD2PS256_MASK,
31710 IX86_BUILTIN_CVTPD2PS_MASK,
31711 IX86_BUILTIN_VPERMVARSI256_MASK,
31712 IX86_BUILTIN_VPERMVARDI256_MASK,
31713 IX86_BUILTIN_VPERMDI256_MASK,
31714 IX86_BUILTIN_CMPQ256,
31715 IX86_BUILTIN_CMPD256,
31716 IX86_BUILTIN_UCMPQ256,
31717 IX86_BUILTIN_UCMPD256,
31718 IX86_BUILTIN_CMPB256,
31719 IX86_BUILTIN_CMPW256,
31720 IX86_BUILTIN_UCMPB256,
31721 IX86_BUILTIN_UCMPW256,
31722 IX86_BUILTIN_CMPPD256_MASK,
31723 IX86_BUILTIN_CMPPS256_MASK,
31724 IX86_BUILTIN_CMPQ128,
31725 IX86_BUILTIN_CMPD128,
31726 IX86_BUILTIN_UCMPQ128,
31727 IX86_BUILTIN_UCMPD128,
31728 IX86_BUILTIN_CMPB128,
31729 IX86_BUILTIN_CMPW128,
31730 IX86_BUILTIN_UCMPB128,
31731 IX86_BUILTIN_UCMPW128,
31732 IX86_BUILTIN_CMPPD128_MASK,
31733 IX86_BUILTIN_CMPPS128_MASK,
31735 IX86_BUILTIN_GATHER3SIV8SF,
31736 IX86_BUILTIN_GATHER3SIV4SF,
31737 IX86_BUILTIN_GATHER3SIV4DF,
31738 IX86_BUILTIN_GATHER3SIV2DF,
31739 IX86_BUILTIN_GATHER3DIV8SF,
31740 IX86_BUILTIN_GATHER3DIV4SF,
31741 IX86_BUILTIN_GATHER3DIV4DF,
31742 IX86_BUILTIN_GATHER3DIV2DF,
31743 IX86_BUILTIN_GATHER3SIV8SI,
31744 IX86_BUILTIN_GATHER3SIV4SI,
31745 IX86_BUILTIN_GATHER3SIV4DI,
31746 IX86_BUILTIN_GATHER3SIV2DI,
31747 IX86_BUILTIN_GATHER3DIV8SI,
31748 IX86_BUILTIN_GATHER3DIV4SI,
31749 IX86_BUILTIN_GATHER3DIV4DI,
31750 IX86_BUILTIN_GATHER3DIV2DI,
31751 IX86_BUILTIN_SCATTERSIV8SF,
31752 IX86_BUILTIN_SCATTERSIV4SF,
31753 IX86_BUILTIN_SCATTERSIV4DF,
31754 IX86_BUILTIN_SCATTERSIV2DF,
31755 IX86_BUILTIN_SCATTERDIV8SF,
31756 IX86_BUILTIN_SCATTERDIV4SF,
31757 IX86_BUILTIN_SCATTERDIV4DF,
31758 IX86_BUILTIN_SCATTERDIV2DF,
31759 IX86_BUILTIN_SCATTERSIV8SI,
31760 IX86_BUILTIN_SCATTERSIV4SI,
31761 IX86_BUILTIN_SCATTERSIV4DI,
31762 IX86_BUILTIN_SCATTERSIV2DI,
31763 IX86_BUILTIN_SCATTERDIV8SI,
31764 IX86_BUILTIN_SCATTERDIV4SI,
31765 IX86_BUILTIN_SCATTERDIV4DI,
31766 IX86_BUILTIN_SCATTERDIV2DI,
31769 IX86_BUILTIN_RANGESD128,
31770 IX86_BUILTIN_RANGESS128,
31771 IX86_BUILTIN_KUNPCKWD,
31772 IX86_BUILTIN_KUNPCKDQ,
31773 IX86_BUILTIN_BROADCASTF32x2_512,
31774 IX86_BUILTIN_BROADCASTI32x2_512,
31775 IX86_BUILTIN_BROADCASTF64X2_512,
31776 IX86_BUILTIN_BROADCASTI64X2_512,
31777 IX86_BUILTIN_BROADCASTF32X8_512,
31778 IX86_BUILTIN_BROADCASTI32X8_512,
31779 IX86_BUILTIN_EXTRACTF64X2_512,
31780 IX86_BUILTIN_EXTRACTF32X8,
31781 IX86_BUILTIN_EXTRACTI64X2_512,
31782 IX86_BUILTIN_EXTRACTI32X8,
31783 IX86_BUILTIN_REDUCEPD512_MASK,
31784 IX86_BUILTIN_REDUCEPS512_MASK,
31785 IX86_BUILTIN_PMULLQ512,
31786 IX86_BUILTIN_XORPD512,
31787 IX86_BUILTIN_XORPS512,
31788 IX86_BUILTIN_ORPD512,
31789 IX86_BUILTIN_ORPS512,
31790 IX86_BUILTIN_ANDPD512,
31791 IX86_BUILTIN_ANDPS512,
31792 IX86_BUILTIN_ANDNPD512,
31793 IX86_BUILTIN_ANDNPS512,
31794 IX86_BUILTIN_INSERTF32X8,
31795 IX86_BUILTIN_INSERTI32X8,
31796 IX86_BUILTIN_INSERTF64X2_512,
31797 IX86_BUILTIN_INSERTI64X2_512,
31798 IX86_BUILTIN_FPCLASSPD512,
31799 IX86_BUILTIN_FPCLASSPS512,
31800 IX86_BUILTIN_CVTD2MASK512,
31801 IX86_BUILTIN_CVTQ2MASK512,
31802 IX86_BUILTIN_CVTMASK2D512,
31803 IX86_BUILTIN_CVTMASK2Q512,
31804 IX86_BUILTIN_CVTPD2QQ512,
31805 IX86_BUILTIN_CVTPS2QQ512,
31806 IX86_BUILTIN_CVTPD2UQQ512,
31807 IX86_BUILTIN_CVTPS2UQQ512,
31808 IX86_BUILTIN_CVTQQ2PS512,
31809 IX86_BUILTIN_CVTUQQ2PS512,
31810 IX86_BUILTIN_CVTQQ2PD512,
31811 IX86_BUILTIN_CVTUQQ2PD512,
31812 IX86_BUILTIN_CVTTPS2QQ512,
31813 IX86_BUILTIN_CVTTPS2UQQ512,
31814 IX86_BUILTIN_CVTTPD2QQ512,
31815 IX86_BUILTIN_CVTTPD2UQQ512,
31816 IX86_BUILTIN_RANGEPS512,
31817 IX86_BUILTIN_RANGEPD512,
31820 IX86_BUILTIN_PACKUSDW512,
31821 IX86_BUILTIN_PACKSSDW512,
31822 IX86_BUILTIN_LOADDQUHI512_MASK,
31823 IX86_BUILTIN_LOADDQUQI512_MASK,
31824 IX86_BUILTIN_PSLLDQ512,
31825 IX86_BUILTIN_PSRLDQ512,
31826 IX86_BUILTIN_STOREDQUHI512_MASK,
31827 IX86_BUILTIN_STOREDQUQI512_MASK,
31828 IX86_BUILTIN_PALIGNR512,
31829 IX86_BUILTIN_PALIGNR512_MASK,
31830 IX86_BUILTIN_MOVDQUHI512_MASK,
31831 IX86_BUILTIN_MOVDQUQI512_MASK,
31832 IX86_BUILTIN_PSADBW512,
31833 IX86_BUILTIN_DBPSADBW512,
31834 IX86_BUILTIN_PBROADCASTB512,
31835 IX86_BUILTIN_PBROADCASTB512_GPR,
31836 IX86_BUILTIN_PBROADCASTW512,
31837 IX86_BUILTIN_PBROADCASTW512_GPR,
31838 IX86_BUILTIN_PMOVSXBW512_MASK,
31839 IX86_BUILTIN_PMOVZXBW512_MASK,
31840 IX86_BUILTIN_VPERMVARHI512_MASK,
31841 IX86_BUILTIN_VPERMT2VARHI512,
31842 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
31843 IX86_BUILTIN_VPERMI2VARHI512,
31844 IX86_BUILTIN_PAVGB512,
31845 IX86_BUILTIN_PAVGW512,
31846 IX86_BUILTIN_PADDB512,
31847 IX86_BUILTIN_PSUBB512,
31848 IX86_BUILTIN_PSUBSB512,
31849 IX86_BUILTIN_PADDSB512,
31850 IX86_BUILTIN_PSUBUSB512,
31851 IX86_BUILTIN_PADDUSB512,
31852 IX86_BUILTIN_PSUBW512,
31853 IX86_BUILTIN_PADDW512,
31854 IX86_BUILTIN_PSUBSW512,
31855 IX86_BUILTIN_PADDSW512,
31856 IX86_BUILTIN_PSUBUSW512,
31857 IX86_BUILTIN_PADDUSW512,
31858 IX86_BUILTIN_PMAXUW512,
31859 IX86_BUILTIN_PMAXSW512,
31860 IX86_BUILTIN_PMINUW512,
31861 IX86_BUILTIN_PMINSW512,
31862 IX86_BUILTIN_PMAXUB512,
31863 IX86_BUILTIN_PMAXSB512,
31864 IX86_BUILTIN_PMINUB512,
31865 IX86_BUILTIN_PMINSB512,
31866 IX86_BUILTIN_PMOVWB512,
31867 IX86_BUILTIN_PMOVSWB512,
31868 IX86_BUILTIN_PMOVUSWB512,
31869 IX86_BUILTIN_PMULHRSW512_MASK,
31870 IX86_BUILTIN_PMULHUW512_MASK,
31871 IX86_BUILTIN_PMULHW512_MASK,
31872 IX86_BUILTIN_PMULLW512_MASK,
31873 IX86_BUILTIN_PSLLWI512_MASK,
31874 IX86_BUILTIN_PSLLW512_MASK,
31875 IX86_BUILTIN_PACKSSWB512,
31876 IX86_BUILTIN_PACKUSWB512,
31877 IX86_BUILTIN_PSRAVV32HI,
31878 IX86_BUILTIN_PMADDUBSW512_MASK,
31879 IX86_BUILTIN_PMADDWD512_MASK,
31880 IX86_BUILTIN_PSRLVV32HI,
31881 IX86_BUILTIN_PUNPCKHBW512,
31882 IX86_BUILTIN_PUNPCKHWD512,
31883 IX86_BUILTIN_PUNPCKLBW512,
31884 IX86_BUILTIN_PUNPCKLWD512,
31885 IX86_BUILTIN_PSHUFB512,
31886 IX86_BUILTIN_PSHUFHW512,
31887 IX86_BUILTIN_PSHUFLW512,
31888 IX86_BUILTIN_PSRAWI512,
31889 IX86_BUILTIN_PSRAW512,
31890 IX86_BUILTIN_PSRLWI512,
31891 IX86_BUILTIN_PSRLW512,
31892 IX86_BUILTIN_CVTB2MASK512,
31893 IX86_BUILTIN_CVTW2MASK512,
31894 IX86_BUILTIN_CVTMASK2B512,
31895 IX86_BUILTIN_CVTMASK2W512,
31896 IX86_BUILTIN_PCMPEQB512_MASK,
31897 IX86_BUILTIN_PCMPEQW512_MASK,
31898 IX86_BUILTIN_PCMPGTB512_MASK,
31899 IX86_BUILTIN_PCMPGTW512_MASK,
31900 IX86_BUILTIN_PTESTMB512,
31901 IX86_BUILTIN_PTESTMW512,
31902 IX86_BUILTIN_PTESTNMB512,
31903 IX86_BUILTIN_PTESTNMW512,
31904 IX86_BUILTIN_PSLLVV32HI,
31905 IX86_BUILTIN_PABSB512,
31906 IX86_BUILTIN_PABSW512,
31907 IX86_BUILTIN_BLENDMW512,
31908 IX86_BUILTIN_BLENDMB512,
31909 IX86_BUILTIN_CMPB512,
31910 IX86_BUILTIN_CMPW512,
31911 IX86_BUILTIN_UCMPB512,
31912 IX86_BUILTIN_UCMPW512,
31914 /* Alternate 4 and 8 element gather/scatter for the vectorizer
31915 where all operands are 32-byte or 64-byte wide respectively. */
31916 IX86_BUILTIN_GATHERALTSIV4DF,
31917 IX86_BUILTIN_GATHERALTDIV8SF,
31918 IX86_BUILTIN_GATHERALTSIV4DI,
31919 IX86_BUILTIN_GATHERALTDIV8SI,
31920 IX86_BUILTIN_GATHER3ALTDIV16SF,
31921 IX86_BUILTIN_GATHER3ALTDIV16SI,
31922 IX86_BUILTIN_GATHER3ALTSIV4DF,
31923 IX86_BUILTIN_GATHER3ALTDIV8SF,
31924 IX86_BUILTIN_GATHER3ALTSIV4DI,
31925 IX86_BUILTIN_GATHER3ALTDIV8SI,
31926 IX86_BUILTIN_GATHER3ALTSIV8DF,
31927 IX86_BUILTIN_GATHER3ALTSIV8DI,
31928 IX86_BUILTIN_GATHER3DIV16SF,
31929 IX86_BUILTIN_GATHER3DIV16SI,
31930 IX86_BUILTIN_GATHER3DIV8DF,
31931 IX86_BUILTIN_GATHER3DIV8DI,
31932 IX86_BUILTIN_GATHER3SIV16SF,
31933 IX86_BUILTIN_GATHER3SIV16SI,
31934 IX86_BUILTIN_GATHER3SIV8DF,
31935 IX86_BUILTIN_GATHER3SIV8DI,
31936 IX86_BUILTIN_SCATTERALTSIV8DF,
31937 IX86_BUILTIN_SCATTERALTDIV16SF,
31938 IX86_BUILTIN_SCATTERALTSIV8DI,
31939 IX86_BUILTIN_SCATTERALTDIV16SI,
31940 IX86_BUILTIN_SCATTERDIV16SF,
31941 IX86_BUILTIN_SCATTERDIV16SI,
31942 IX86_BUILTIN_SCATTERDIV8DF,
31943 IX86_BUILTIN_SCATTERDIV8DI,
31944 IX86_BUILTIN_SCATTERSIV16SF,
31945 IX86_BUILTIN_SCATTERSIV16SI,
31946 IX86_BUILTIN_SCATTERSIV8DF,
31947 IX86_BUILTIN_SCATTERSIV8DI,
31950 IX86_BUILTIN_GATHERPFQPD,
31951 IX86_BUILTIN_GATHERPFDPS,
31952 IX86_BUILTIN_GATHERPFDPD,
31953 IX86_BUILTIN_GATHERPFQPS,
31954 IX86_BUILTIN_SCATTERPFDPD,
31955 IX86_BUILTIN_SCATTERPFDPS,
31956 IX86_BUILTIN_SCATTERPFQPD,
31957 IX86_BUILTIN_SCATTERPFQPS,
31960 IX86_BUILTIN_EXP2PD_MASK,
31961 IX86_BUILTIN_EXP2PS_MASK,
31962 IX86_BUILTIN_EXP2PS,
31963 IX86_BUILTIN_RCP28PD,
31964 IX86_BUILTIN_RCP28PS,
31965 IX86_BUILTIN_RCP28SD,
31966 IX86_BUILTIN_RCP28SS,
31967 IX86_BUILTIN_RSQRT28PD,
31968 IX86_BUILTIN_RSQRT28PS,
31969 IX86_BUILTIN_RSQRT28SD,
31970 IX86_BUILTIN_RSQRT28SS,
31973 IX86_BUILTIN_VPMADD52LUQ512,
31974 IX86_BUILTIN_VPMADD52HUQ512,
31975 IX86_BUILTIN_VPMADD52LUQ256,
31976 IX86_BUILTIN_VPMADD52HUQ256,
31977 IX86_BUILTIN_VPMADD52LUQ128,
31978 IX86_BUILTIN_VPMADD52HUQ128,
31979 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
31980 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
31981 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
31982 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
31983 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
31984 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
31987 IX86_BUILTIN_VPMULTISHIFTQB512,
31988 IX86_BUILTIN_VPMULTISHIFTQB256,
31989 IX86_BUILTIN_VPMULTISHIFTQB128,
31990 IX86_BUILTIN_VPERMVARQI512_MASK,
31991 IX86_BUILTIN_VPERMT2VARQI512,
31992 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
31993 IX86_BUILTIN_VPERMI2VARQI512,
31994 IX86_BUILTIN_VPERMVARQI256_MASK,
31995 IX86_BUILTIN_VPERMVARQI128_MASK,
31996 IX86_BUILTIN_VPERMT2VARQI256,
31997 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
31998 IX86_BUILTIN_VPERMT2VARQI128,
31999 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
32000 IX86_BUILTIN_VPERMI2VARQI256,
32001 IX86_BUILTIN_VPERMI2VARQI128,
32003 /* SHA builtins. */
32004 IX86_BUILTIN_SHA1MSG1,
32005 IX86_BUILTIN_SHA1MSG2,
32006 IX86_BUILTIN_SHA1NEXTE,
32007 IX86_BUILTIN_SHA1RNDS4,
32008 IX86_BUILTIN_SHA256MSG1,
32009 IX86_BUILTIN_SHA256MSG2,
32010 IX86_BUILTIN_SHA256RNDS2,
32012 /* CLWB instructions. */
32015 /* PCOMMIT instructions. */
32016 IX86_BUILTIN_PCOMMIT,
32018 /* CLFLUSHOPT instructions. */
32019 IX86_BUILTIN_CLFLUSHOPT,
32021 /* TFmode support builtins. */
32023 IX86_BUILTIN_HUGE_VALQ,
32024 IX86_BUILTIN_FABSQ,
32025 IX86_BUILTIN_COPYSIGNQ,
32027 /* Vectorizer support builtins. */
32028 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
32029 IX86_BUILTIN_CPYSGNPS,
32030 IX86_BUILTIN_CPYSGNPD,
32031 IX86_BUILTIN_CPYSGNPS256,
32032 IX86_BUILTIN_CPYSGNPS512,
32033 IX86_BUILTIN_CPYSGNPD256,
32034 IX86_BUILTIN_CPYSGNPD512,
32035 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
32036 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
32039 /* FMA4 instructions. */
32040 IX86_BUILTIN_VFMADDSS,
32041 IX86_BUILTIN_VFMADDSD,
32042 IX86_BUILTIN_VFMADDPS,
32043 IX86_BUILTIN_VFMADDPD,
32044 IX86_BUILTIN_VFMADDPS256,
32045 IX86_BUILTIN_VFMADDPD256,
32046 IX86_BUILTIN_VFMADDSUBPS,
32047 IX86_BUILTIN_VFMADDSUBPD,
32048 IX86_BUILTIN_VFMADDSUBPS256,
32049 IX86_BUILTIN_VFMADDSUBPD256,
32051 /* FMA3 instructions. */
32052 IX86_BUILTIN_VFMADDSS3,
32053 IX86_BUILTIN_VFMADDSD3,
32055 /* XOP instructions. */
32056 IX86_BUILTIN_VPCMOV,
32057 IX86_BUILTIN_VPCMOV_V2DI,
32058 IX86_BUILTIN_VPCMOV_V4SI,
32059 IX86_BUILTIN_VPCMOV_V8HI,
32060 IX86_BUILTIN_VPCMOV_V16QI,
32061 IX86_BUILTIN_VPCMOV_V4SF,
32062 IX86_BUILTIN_VPCMOV_V2DF,
32063 IX86_BUILTIN_VPCMOV256,
32064 IX86_BUILTIN_VPCMOV_V4DI256,
32065 IX86_BUILTIN_VPCMOV_V8SI256,
32066 IX86_BUILTIN_VPCMOV_V16HI256,
32067 IX86_BUILTIN_VPCMOV_V32QI256,
32068 IX86_BUILTIN_VPCMOV_V8SF256,
32069 IX86_BUILTIN_VPCMOV_V4DF256,
32071 IX86_BUILTIN_VPPERM,
32073 IX86_BUILTIN_VPMACSSWW,
32074 IX86_BUILTIN_VPMACSWW,
32075 IX86_BUILTIN_VPMACSSWD,
32076 IX86_BUILTIN_VPMACSWD,
32077 IX86_BUILTIN_VPMACSSDD,
32078 IX86_BUILTIN_VPMACSDD,
32079 IX86_BUILTIN_VPMACSSDQL,
32080 IX86_BUILTIN_VPMACSSDQH,
32081 IX86_BUILTIN_VPMACSDQL,
32082 IX86_BUILTIN_VPMACSDQH,
32083 IX86_BUILTIN_VPMADCSSWD,
32084 IX86_BUILTIN_VPMADCSWD,
32086 IX86_BUILTIN_VPHADDBW,
32087 IX86_BUILTIN_VPHADDBD,
32088 IX86_BUILTIN_VPHADDBQ,
32089 IX86_BUILTIN_VPHADDWD,
32090 IX86_BUILTIN_VPHADDWQ,
32091 IX86_BUILTIN_VPHADDDQ,
32092 IX86_BUILTIN_VPHADDUBW,
32093 IX86_BUILTIN_VPHADDUBD,
32094 IX86_BUILTIN_VPHADDUBQ,
32095 IX86_BUILTIN_VPHADDUWD,
32096 IX86_BUILTIN_VPHADDUWQ,
32097 IX86_BUILTIN_VPHADDUDQ,
32098 IX86_BUILTIN_VPHSUBBW,
32099 IX86_BUILTIN_VPHSUBWD,
32100 IX86_BUILTIN_VPHSUBDQ,
32102 IX86_BUILTIN_VPROTB,
32103 IX86_BUILTIN_VPROTW,
32104 IX86_BUILTIN_VPROTD,
32105 IX86_BUILTIN_VPROTQ,
32106 IX86_BUILTIN_VPROTB_IMM,
32107 IX86_BUILTIN_VPROTW_IMM,
32108 IX86_BUILTIN_VPROTD_IMM,
32109 IX86_BUILTIN_VPROTQ_IMM,
32111 IX86_BUILTIN_VPSHLB,
32112 IX86_BUILTIN_VPSHLW,
32113 IX86_BUILTIN_VPSHLD,
32114 IX86_BUILTIN_VPSHLQ,
32115 IX86_BUILTIN_VPSHAB,
32116 IX86_BUILTIN_VPSHAW,
32117 IX86_BUILTIN_VPSHAD,
32118 IX86_BUILTIN_VPSHAQ,
32120 IX86_BUILTIN_VFRCZSS,
32121 IX86_BUILTIN_VFRCZSD,
32122 IX86_BUILTIN_VFRCZPS,
32123 IX86_BUILTIN_VFRCZPD,
32124 IX86_BUILTIN_VFRCZPS256,
32125 IX86_BUILTIN_VFRCZPD256,
32127 IX86_BUILTIN_VPCOMEQUB,
32128 IX86_BUILTIN_VPCOMNEUB,
32129 IX86_BUILTIN_VPCOMLTUB,
32130 IX86_BUILTIN_VPCOMLEUB,
32131 IX86_BUILTIN_VPCOMGTUB,
32132 IX86_BUILTIN_VPCOMGEUB,
32133 IX86_BUILTIN_VPCOMFALSEUB,
32134 IX86_BUILTIN_VPCOMTRUEUB,
32136 IX86_BUILTIN_VPCOMEQUW,
32137 IX86_BUILTIN_VPCOMNEUW,
32138 IX86_BUILTIN_VPCOMLTUW,
32139 IX86_BUILTIN_VPCOMLEUW,
32140 IX86_BUILTIN_VPCOMGTUW,
32141 IX86_BUILTIN_VPCOMGEUW,
32142 IX86_BUILTIN_VPCOMFALSEUW,
32143 IX86_BUILTIN_VPCOMTRUEUW,
32145 IX86_BUILTIN_VPCOMEQUD,
32146 IX86_BUILTIN_VPCOMNEUD,
32147 IX86_BUILTIN_VPCOMLTUD,
32148 IX86_BUILTIN_VPCOMLEUD,
32149 IX86_BUILTIN_VPCOMGTUD,
32150 IX86_BUILTIN_VPCOMGEUD,
32151 IX86_BUILTIN_VPCOMFALSEUD,
32152 IX86_BUILTIN_VPCOMTRUEUD,
32154 IX86_BUILTIN_VPCOMEQUQ,
32155 IX86_BUILTIN_VPCOMNEUQ,
32156 IX86_BUILTIN_VPCOMLTUQ,
32157 IX86_BUILTIN_VPCOMLEUQ,
32158 IX86_BUILTIN_VPCOMGTUQ,
32159 IX86_BUILTIN_VPCOMGEUQ,
32160 IX86_BUILTIN_VPCOMFALSEUQ,
32161 IX86_BUILTIN_VPCOMTRUEUQ,
32163 IX86_BUILTIN_VPCOMEQB,
32164 IX86_BUILTIN_VPCOMNEB,
32165 IX86_BUILTIN_VPCOMLTB,
32166 IX86_BUILTIN_VPCOMLEB,
32167 IX86_BUILTIN_VPCOMGTB,
32168 IX86_BUILTIN_VPCOMGEB,
32169 IX86_BUILTIN_VPCOMFALSEB,
32170 IX86_BUILTIN_VPCOMTRUEB,
32172 IX86_BUILTIN_VPCOMEQW,
32173 IX86_BUILTIN_VPCOMNEW,
32174 IX86_BUILTIN_VPCOMLTW,
32175 IX86_BUILTIN_VPCOMLEW,
32176 IX86_BUILTIN_VPCOMGTW,
32177 IX86_BUILTIN_VPCOMGEW,
32178 IX86_BUILTIN_VPCOMFALSEW,
32179 IX86_BUILTIN_VPCOMTRUEW,
32181 IX86_BUILTIN_VPCOMEQD,
32182 IX86_BUILTIN_VPCOMNED,
32183 IX86_BUILTIN_VPCOMLTD,
32184 IX86_BUILTIN_VPCOMLED,
32185 IX86_BUILTIN_VPCOMGTD,
32186 IX86_BUILTIN_VPCOMGED,
32187 IX86_BUILTIN_VPCOMFALSED,
32188 IX86_BUILTIN_VPCOMTRUED,
32190 IX86_BUILTIN_VPCOMEQQ,
32191 IX86_BUILTIN_VPCOMNEQ,
32192 IX86_BUILTIN_VPCOMLTQ,
32193 IX86_BUILTIN_VPCOMLEQ,
32194 IX86_BUILTIN_VPCOMGTQ,
32195 IX86_BUILTIN_VPCOMGEQ,
32196 IX86_BUILTIN_VPCOMFALSEQ,
32197 IX86_BUILTIN_VPCOMTRUEQ,
32199 /* LWP instructions. */
32200 IX86_BUILTIN_LLWPCB,
32201 IX86_BUILTIN_SLWPCB,
32202 IX86_BUILTIN_LWPVAL32,
32203 IX86_BUILTIN_LWPVAL64,
32204 IX86_BUILTIN_LWPINS32,
32205 IX86_BUILTIN_LWPINS64,
32210 IX86_BUILTIN_XBEGIN,
32212 IX86_BUILTIN_XABORT,
32213 IX86_BUILTIN_XTEST,
32216 IX86_BUILTIN_BNDMK,
32217 IX86_BUILTIN_BNDSTX,
32218 IX86_BUILTIN_BNDLDX,
32219 IX86_BUILTIN_BNDCL,
32220 IX86_BUILTIN_BNDCU,
32221 IX86_BUILTIN_BNDRET,
32222 IX86_BUILTIN_BNDNARROW,
32223 IX86_BUILTIN_BNDINT,
32224 IX86_BUILTIN_SIZEOF,
32225 IX86_BUILTIN_BNDLOWER,
32226 IX86_BUILTIN_BNDUPPER,
32228 /* BMI instructions. */
32229 IX86_BUILTIN_BEXTR32,
32230 IX86_BUILTIN_BEXTR64,
32233 /* TBM instructions. */
32234 IX86_BUILTIN_BEXTRI32,
32235 IX86_BUILTIN_BEXTRI64,
32237 /* BMI2 instructions. */
32238 IX86_BUILTIN_BZHI32,
32239 IX86_BUILTIN_BZHI64,
32240 IX86_BUILTIN_PDEP32,
32241 IX86_BUILTIN_PDEP64,
32242 IX86_BUILTIN_PEXT32,
32243 IX86_BUILTIN_PEXT64,
32245 /* ADX instructions. */
32246 IX86_BUILTIN_ADDCARRYX32,
32247 IX86_BUILTIN_ADDCARRYX64,
32249 /* SBB instructions. */
32250 IX86_BUILTIN_SBB32,
32251 IX86_BUILTIN_SBB64,
32253 /* FSGSBASE instructions. */
32254 IX86_BUILTIN_RDFSBASE32,
32255 IX86_BUILTIN_RDFSBASE64,
32256 IX86_BUILTIN_RDGSBASE32,
32257 IX86_BUILTIN_RDGSBASE64,
32258 IX86_BUILTIN_WRFSBASE32,
32259 IX86_BUILTIN_WRFSBASE64,
32260 IX86_BUILTIN_WRGSBASE32,
32261 IX86_BUILTIN_WRGSBASE64,
32263 /* RDRND instructions. */
32264 IX86_BUILTIN_RDRAND16_STEP,
32265 IX86_BUILTIN_RDRAND32_STEP,
32266 IX86_BUILTIN_RDRAND64_STEP,
32268 /* RDSEED instructions. */
32269 IX86_BUILTIN_RDSEED16_STEP,
32270 IX86_BUILTIN_RDSEED32_STEP,
32271 IX86_BUILTIN_RDSEED64_STEP,
32273 /* F16C instructions. */
32274 IX86_BUILTIN_CVTPH2PS,
32275 IX86_BUILTIN_CVTPH2PS256,
32276 IX86_BUILTIN_CVTPS2PH,
32277 IX86_BUILTIN_CVTPS2PH256,
32279 /* MONITORX and MWAITX instrucions. */
32280 IX86_BUILTIN_MONITORX,
32281 IX86_BUILTIN_MWAITX,
32283 /* CFString built-in for darwin */
32284 IX86_BUILTIN_CFSTRING,
32286 /* Builtins to get CPU type and supported features. */
32287 IX86_BUILTIN_CPU_INIT,
32288 IX86_BUILTIN_CPU_IS,
32289 IX86_BUILTIN_CPU_SUPPORTS,
32291 /* Read/write FLAGS register built-ins. */
32292 IX86_BUILTIN_READ_FLAGS,
32293 IX86_BUILTIN_WRITE_FLAGS,
32295 /* PKU instructions. */
32296 IX86_BUILTIN_RDPKRU,
32297 IX86_BUILTIN_WRPKRU,
32302 /* Table for the ix86 builtin decls. */
32303 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
32305 /* Table of all of the builtin functions that are possible with different ISA's
32306 but are waiting to be built until a function is declared to use that
32308 struct builtin_isa {
32309 const char *name; /* function name */
32310 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
32311 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
32312 bool const_p; /* true if the declaration is constant */
32313 bool leaf_p; /* true if the declaration has leaf attribute */
32314 bool nothrow_p; /* true if the declaration has nothrow attribute */
32315 bool set_and_not_built_p;
32318 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
32320 /* Bits that can still enable any inclusion of a builtin. */
32321 static HOST_WIDE_INT deferred_isa_values = 0;
32323 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32324 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32325 function decl in the ix86_builtins array. Returns the function decl or
32326 NULL_TREE, if the builtin was not added.
32328 If the front end has a special hook for builtin functions, delay adding
32329 builtin functions that aren't in the current ISA until the ISA is changed
32330 with function specific optimization. Doing so, can save about 300K for the
32331 default compiler. When the builtin is expanded, check at that time whether
32334 If the front end doesn't have a special hook, record all builtins, even if
32335 it isn't an instruction set in the current ISA in case the user uses
32336 function specific options for a different ISA, so that we don't get scope
32337 errors if a builtin is added in the middle of a function scope. */
32340 def_builtin (HOST_WIDE_INT mask, const char *name,
32341 enum ix86_builtin_func_type tcode,
32342 enum ix86_builtins code)
32344 tree decl = NULL_TREE;
32346 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
32348 ix86_builtins_isa[(int) code].isa = mask;
32350 mask &= ~OPTION_MASK_ISA_64BIT;
32352 || (mask & ix86_isa_flags) != 0
32353 || (lang_hooks.builtin_function
32354 == lang_hooks.builtin_function_ext_scope))
32357 tree type = ix86_get_builtin_func_type (tcode);
32358 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32360 ix86_builtins[(int) code] = decl;
32361 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
32365 /* Just a MASK where set_and_not_built_p == true can potentially
32366 include a builtin. */
32367 deferred_isa_values |= mask;
32368 ix86_builtins[(int) code] = NULL_TREE;
32369 ix86_builtins_isa[(int) code].tcode = tcode;
32370 ix86_builtins_isa[(int) code].name = name;
32371 ix86_builtins_isa[(int) code].leaf_p = false;
32372 ix86_builtins_isa[(int) code].nothrow_p = false;
32373 ix86_builtins_isa[(int) code].const_p = false;
32374 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
32381 /* Like def_builtin, but also marks the function decl "const". */
32384 def_builtin_const (HOST_WIDE_INT mask, const char *name,
32385 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
32387 tree decl = def_builtin (mask, name, tcode, code);
32389 TREE_READONLY (decl) = 1;
32391 ix86_builtins_isa[(int) code].const_p = true;
32396 /* Add any new builtin functions for a given ISA that may not have been
32397 declared. This saves a bit of space compared to adding all of the
32398 declarations to the tree, even if we didn't use them. */
32401 ix86_add_new_builtins (HOST_WIDE_INT isa)
32403 if ((isa & deferred_isa_values) == 0)
32406 /* Bits in ISA value can be removed from potential isa values. */
32407 deferred_isa_values &= ~isa;
32410 tree saved_current_target_pragma = current_target_pragma;
32411 current_target_pragma = NULL_TREE;
32413 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
32415 if ((ix86_builtins_isa[i].isa & isa) != 0
32416 && ix86_builtins_isa[i].set_and_not_built_p)
32420 /* Don't define the builtin again. */
32421 ix86_builtins_isa[i].set_and_not_built_p = false;
32423 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
32424 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
32425 type, i, BUILT_IN_MD, NULL,
32428 ix86_builtins[i] = decl;
32429 if (ix86_builtins_isa[i].const_p)
32430 TREE_READONLY (decl) = 1;
32431 if (ix86_builtins_isa[i].leaf_p)
32432 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
32434 if (ix86_builtins_isa[i].nothrow_p)
32435 TREE_NOTHROW (decl) = 1;
32439 current_target_pragma = saved_current_target_pragma;
32442 /* Bits for builtin_description.flag. */
32444 /* Set when we don't support the comparison natively, and should
32445 swap_comparison in order to support it. */
32446 #define BUILTIN_DESC_SWAP_OPERANDS 1
32448 struct builtin_description
32450 const HOST_WIDE_INT mask;
32451 const enum insn_code icode;
32452 const char *const name;
32453 const enum ix86_builtins code;
32454 const enum rtx_code comparison;
32458 static const struct builtin_description bdesc_comi[] =
32460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
32461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
32462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
32463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
32464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
32465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
32466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
32467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
32468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
32469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
32470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
32471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
32472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
32473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
32474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
32475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
32476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
32477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
32478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
32479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
32480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
32481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
32482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
32483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
32486 static const struct builtin_description bdesc_pcmpestr[] =
32489 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
32490 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
32491 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
32492 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
32493 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
32494 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
32495 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
32498 static const struct builtin_description bdesc_pcmpistr[] =
32501 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
32502 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
32503 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
32504 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
32505 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
32506 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
32507 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
32510 /* Special builtins with variable number of arguments. */
32511 static const struct builtin_description bdesc_special_args[] =
32513 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
32514 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
32515 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
32517 /* 80387 (for use internally for atomic compound assignment). */
32518 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
32519 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
32520 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
32521 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
32524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32527 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
32529 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
32530 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
32531 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
32532 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32533 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32534 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32535 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32536 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32537 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32539 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32540 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
32541 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32542 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32543 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32544 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32545 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32546 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
32549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
32555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
32558 /* SSE or 3DNow!A */
32559 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32560 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
32563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
32565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
32567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
32569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
32570 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
32571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
32572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
32578 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
32581 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
32584 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
32585 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
32588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
32589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
32591 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
32592 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32593 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
32595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
32597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
32598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
32599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
32603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
32605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
32606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
32607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
32609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
32610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
32611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
32612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
32613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
32614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
32615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
32616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
32619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
32620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
32621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
32622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
32623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
32624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
32625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
32626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
32627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
32630 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32631 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32632 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32633 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32634 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32635 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32636 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32637 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32638 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32639 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
32647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
32648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
32649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
32650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
32651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
32652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
32653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
32654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
32660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
32663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
32666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
32669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
32672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
32674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
32675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
32676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
32678 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
32679 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
32680 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
32681 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
32682 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
32683 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
32686 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32687 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32688 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32689 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
32690 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32691 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32692 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
32693 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
32696 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32697 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
32698 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
32701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
32702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
32703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
32704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
32707 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
32708 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
32709 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
32710 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
32744 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
32745 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
32746 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
32803 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
32805 /* RDPKRU and WRPKRU. */
32806 { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
32807 { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
32810 /* Builtins with variable number of arguments. */
32811 static const struct builtin_description bdesc_args[] =
32813 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
32814 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
32815 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
32816 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32817 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32818 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
32819 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
32822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32844 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32850 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32851 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32854 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32855 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
32856 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
32858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
32860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
32862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
32864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
32866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32871 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32874 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
32876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
32880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
32881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
32882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
32883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
32886 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32887 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32888 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32889 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32891 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32892 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32893 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32894 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32895 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32896 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
32897 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32898 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32899 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32900 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32901 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32902 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32903 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32904 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32905 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32908 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
32909 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
32910 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
32911 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
32912 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32913 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
32916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
32917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32918 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32920 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32924 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
32926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
32927 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
32929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32931 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32932 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32933 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
32950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
32951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
32953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
32954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
32955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
32957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
32958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
32959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
32961 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32962 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32966 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32968 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32969 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32971 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32973 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32976 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32977 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
32980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
32981 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
32983 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
32985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
32989 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
32990 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
32992 /* SSE MMX or 3Dnow!A */
32993 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32994 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32995 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32997 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
32998 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
32999 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33000 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33002 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
33003 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
33005 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
33008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
33011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
33012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
33014 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
33016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
33019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
33020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
33022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
33024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
33026 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33027 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
33029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
33031 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33033 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33034 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33035 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33036 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
33047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
33053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
33055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
33056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
33057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
33059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
33060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
33061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
33063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33064 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33068 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33070 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33071 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33073 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33076 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33077 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33079 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33081 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33082 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33083 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33084 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33085 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33086 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33087 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33088 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33099 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33100 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
33102 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33104 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33105 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33117 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33118 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33119 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33122 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33123 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33124 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33125 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33126 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33127 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33128 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33129 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
33135 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
33138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
33139 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
33143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
33144 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
33145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
33146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
33148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33149 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33150 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33151 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33152 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33153 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33154 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
33157 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33158 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33159 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
33160 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33161 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33162 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
33164 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
33165 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
33166 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
33167 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
33169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
33170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
33173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
33175 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33178 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33179 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
33182 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
33183 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33185 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33186 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33187 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33188 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33189 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33190 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33193 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
33195 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33196 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
33197 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33198 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
33200 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33201 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33202 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33203 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33204 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33205 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33206 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33207 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33208 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33209 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33210 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
33213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
33214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33217 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
33220 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33221 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
33222 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33223 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
33226 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
33227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
33230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
33233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
33234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
33238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
33239 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
33241 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33242 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33243 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33244 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33245 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33246 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33247 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
33248 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
33249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
33250 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
33251 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
33252 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
33253 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33255 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
33256 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33257 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33258 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33259 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33260 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33261 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
33262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
33265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
33266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33269 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33270 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33271 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33272 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33274 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
33275 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
33276 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
33277 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
33279 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33280 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
33282 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
33283 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
33285 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
33286 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
33287 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
33288 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
33290 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
33291 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
33293 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33294 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
33296 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33297 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33298 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
33301 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33302 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
33303 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
33304 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33305 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33308 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
33309 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
33310 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
33311 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33314 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
33315 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33317 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33318 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33319 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33320 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33323 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
33326 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33327 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33330 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33331 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33334 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33340 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33341 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33342 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33343 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33344 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33345 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33346 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33347 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33348 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33349 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33350 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33351 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33353 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
33354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
33355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
33356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
33361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
33362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
33372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
33373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
33374 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
33375 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
33376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
33377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
33381 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
33383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
33384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
33386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
33387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
33390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
33391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
33393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33397 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33399 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33401 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
33408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
33409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
33410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
33411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
33413 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
33414 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
33419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
33420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
33421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
33422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
33424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
33425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
33427 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
33428 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
33430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33438 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
33439 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
33440 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
33442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
33445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
33448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
33451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
33458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
33461 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
33462 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
33464 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
33467 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
33468 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
33469 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
33470 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
33471 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33472 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33473 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
33474 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
33475 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33476 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33477 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33478 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33479 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33480 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33481 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33482 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33483 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
33484 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33485 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33486 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33487 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33488 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
33489 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
33490 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33491 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33492 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33493 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33494 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33495 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33496 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33497 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33498 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33499 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33500 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33501 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33502 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33503 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33504 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33505 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
33506 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33507 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33508 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33509 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33510 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33511 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33512 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33513 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33514 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33515 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33516 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33517 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33518 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
33519 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33520 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33521 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33522 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33523 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33524 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33525 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
33526 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
33527 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
33528 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
33529 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
33530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
33531 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33532 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33533 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33534 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33535 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33536 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33537 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
33538 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33539 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
33540 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33541 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
33542 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33543 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
33544 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33545 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33546 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33547 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33548 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33549 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33550 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33551 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33552 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33553 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33554 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33555 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33556 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33557 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
33559 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
33560 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
33561 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
33562 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
33563 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
33564 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
33565 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33566 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33567 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33568 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33569 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33570 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
33578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
33579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33581 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
33583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
33584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
33585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
33588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
33589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
33590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
33591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
33592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
33593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
33594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
33595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
33596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
33598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
33599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
33600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
33601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
33602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
33603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
33610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
33611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
33612 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33614 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33617 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33618 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33619 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
33622 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33623 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33626 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
33627 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
33628 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
33629 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
33632 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33633 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33634 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33635 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33636 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
33637 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
33640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
33641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
33642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
33643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
33644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
33645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
33646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
33654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
33656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
33664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
33665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
33666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33668 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33670 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
33671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
33672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
33673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
33674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
33675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
33676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
33677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
33678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
33695 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33696 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
33699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
33733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
33734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
33735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
33736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
33737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
33738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
33739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
33740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
33741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
33742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
33745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
33769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
33770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
33771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
33779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
33780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
33791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
33792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
33793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
33794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
33797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
33798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
33799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
33800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
33801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
33802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33806 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33807 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33808 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
33809 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
33810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
33812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
33824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
33826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
33829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
33831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
33832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
33834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
33837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
33838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
33839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
33840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33841 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
33842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
33843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
33846 /* Mask arithmetic operations */
33847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
33850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
33856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
33859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
33863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
33865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
33868 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
33869 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
33870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
33871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
33873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
33875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
33877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33878 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33879 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33880 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33881 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33906 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33907 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33908 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33909 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33910 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
33911 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
33912 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
33913 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
33914 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33915 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33916 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33917 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33918 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
33922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
33923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
33924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
33925 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33926 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33927 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33928 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33929 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33930 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33931 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
33932 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
33933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33935 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33936 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33937 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
33938 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
33939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
33942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
33943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
33948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
33949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
33952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
33953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
33956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
33957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
33958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
33959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
33960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
33961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
33963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
33964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
33965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33966 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
33967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
33968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
33969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
33971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
33972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
33973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
33975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
33976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
33978 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
33979 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
33980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
33981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
33982 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
33988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
33989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
33990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
33991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
33992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
33993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
33994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
33995 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
33996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
33997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
33998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
33999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
34000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
34001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
34002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
34003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
34004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
34005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
34006 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34007 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34008 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34009 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34010 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34011 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34014 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34015 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34033 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34036 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34040 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34041 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34049 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34052 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34053 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34057 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34060 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34061 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34062 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34063 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34072 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
34073 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
34074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
34079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
34080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
34085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
34086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
34091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
34092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
34097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
34098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
34103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
34104 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34105 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34106 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34107 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
34114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
34116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
34118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
34120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34127 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34128 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34129 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34130 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34131 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34132 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34133 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34134 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34135 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34136 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34137 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34138 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34139 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34140 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34141 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
34168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
34172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34186 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34187 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34188 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
34189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
34190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
34196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
34198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
34200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
34202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34250 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
34251 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
34252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
34255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
34256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
34257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
34258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
34263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
34264 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34265 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34266 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
34267 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
34268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34279 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34280 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
34281 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
34282 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34283 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34284 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
34285 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
34286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
34294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
34296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
34306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
34308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
34319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
34320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
34321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
34322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
34330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34344 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34345 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
34347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
34348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
34349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
34354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
34358 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
34359 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
34360 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
34361 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
34362 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
34363 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
34364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
34365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
34366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
34367 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
34368 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
34369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
34370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
34371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
34372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
34373 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
34374 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
34375 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
34376 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
34377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
34378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34379 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34380 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34383 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34388 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34389 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34390 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34391 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34396 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34397 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34398 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34399 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
34405 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
34406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
34407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
34408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
34409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
34410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
34411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
34412 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
34413 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
34414 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
34415 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
34416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34459 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34460 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34461 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34462 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34463 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34464 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34465 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34466 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34467 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34472 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34473 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34474 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34475 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
34481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
34482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
34483 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34484 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34485 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34486 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
34488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
34489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
34490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
34491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
34492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
34493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
34494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
34495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
34496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
34517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
34518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
34520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
34522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
34530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
34531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
34532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
34533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
34534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
34535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
34536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
34537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
34538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
34539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
34540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
34541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
34542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
34543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
34544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
34545 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
34546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
34547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
34548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
34549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
34551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
34553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
34555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
34556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
34557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
34558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
34560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
34563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
34564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
34567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
34568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
34569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
34570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
34573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
34574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
34577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
34578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
34579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
34582 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
34583 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
34584 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
34585 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
34586 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
34587 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
34588 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
34589 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
34590 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
34591 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
34592 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
34593 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
34594 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34595 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34596 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34597 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34598 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34599 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
34600 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34601 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
34602 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
34603 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
34604 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
34605 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
34606 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
34607 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
34608 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
34609 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
34610 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
34611 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
34612 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
34615 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
34616 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
34617 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34618 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34619 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
34620 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
34621 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
34622 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
34623 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34624 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34625 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
34626 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
34627 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
34628 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
34629 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
34630 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
34631 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34632 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
34633 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34634 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34635 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34636 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34637 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34638 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34639 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34640 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34641 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34642 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34643 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34644 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34645 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34646 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34647 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34648 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34649 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34650 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34651 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
34662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
34670 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34671 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
34672 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
34673 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34674 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34675 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34676 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34677 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34678 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34679 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34681 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34682 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34683 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
34684 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
34685 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
34686 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
34687 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
34688 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
34689 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34690 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34691 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34692 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34693 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34694 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34695 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
34696 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
34697 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
34698 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34699 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34700 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
34701 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
34702 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34703 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34704 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
34705 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
34708 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34709 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34710 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34711 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
34712 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34713 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34714 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34715 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
34716 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34717 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34718 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34719 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
34722 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34723 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34724 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34725 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34726 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34727 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34728 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
34729 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34730 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34731 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34732 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34733 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34734 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34735 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
34736 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
34739 /* Builtins with rounding support. */
34740 static const struct builtin_description bdesc_round_args[] =
34743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
34748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
34749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
34750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
34751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
34752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
34753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
34756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
34758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
34760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
34762 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
34763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
34764 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
34765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
34766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
34768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
34770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
34771 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
34772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
34773 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
34774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
34780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
34782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
34784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
34786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
34807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
34808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34823 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34825 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34827 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34829 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
34831 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
34832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
34833 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
34834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
34835 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
34836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
34837 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
34838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
34845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
34846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
34860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
34864 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34865 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34866 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34867 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34868 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34869 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34870 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
34871 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
34872 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
34873 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
34876 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
34877 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
34878 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34879 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
34884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
34886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
34888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
34890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
34891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
34894 /* Bultins for MPX. */
34895 static const struct builtin_description bdesc_mpx[] =
34897 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
34898 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34899 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
34902 /* Const builtins for MPX. */
34903 static const struct builtin_description bdesc_mpx_const[] =
34905 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
34906 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
34907 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
34908 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
34909 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
34910 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
34911 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
34912 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
34915 /* FMA4 and XOP. */
34916 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
34917 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
34918 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
34919 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
34920 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
34921 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
34922 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
34923 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
34924 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
34925 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
34926 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
34927 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
34928 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
34929 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
34930 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
34931 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
34932 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
34933 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
34934 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
34935 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
34936 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
34937 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
34938 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
34939 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
34940 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
34941 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
34942 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
34943 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
34944 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
34945 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
34946 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
34947 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
34948 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
34949 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
34950 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
34951 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
34952 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
34953 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
34954 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
34955 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
34956 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
34957 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
34958 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
34959 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
34960 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
34961 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
34962 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
34963 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
34964 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
34965 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
34966 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
34967 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
34969 static const struct builtin_description bdesc_multi_arg[] =
34971 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
34972 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
34973 UNKNOWN, (int)MULTI_ARG_3_SF },
34974 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
34975 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
34976 UNKNOWN, (int)MULTI_ARG_3_DF },
34978 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
34979 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
34980 UNKNOWN, (int)MULTI_ARG_3_SF },
34981 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
34982 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
34983 UNKNOWN, (int)MULTI_ARG_3_DF },
34985 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
34986 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
34987 UNKNOWN, (int)MULTI_ARG_3_SF },
34988 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
34989 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
34990 UNKNOWN, (int)MULTI_ARG_3_DF },
34991 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
34992 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
34993 UNKNOWN, (int)MULTI_ARG_3_SF2 },
34994 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
34995 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
34996 UNKNOWN, (int)MULTI_ARG_3_DF2 },
34998 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
34999 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
35000 UNKNOWN, (int)MULTI_ARG_3_SF },
35001 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
35002 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
35003 UNKNOWN, (int)MULTI_ARG_3_DF },
35004 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
35005 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
35006 UNKNOWN, (int)MULTI_ARG_3_SF2 },
35007 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
35008 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
35009 UNKNOWN, (int)MULTI_ARG_3_DF2 },
35011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
35012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
35013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
35014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
35015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
35016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
35017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
35019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
35021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
35022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
35023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
35024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
35025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
35027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
35029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
35031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
35035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
35039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
35042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
35044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
35045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
35046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
35047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
35048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
35049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
35050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
35052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
35053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
35054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
35055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
35056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
35057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
35059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
35060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
35061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
35062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
35063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
35064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
35066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
35074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
35075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
35077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
35079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
35080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
35082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
35083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
35085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
35086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
35087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
35088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
35090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
35091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
35093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
35094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
35095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
35096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
35098 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
35099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
35101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
35102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
35103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
35104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
35106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
35109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
35110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
35111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
35112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
35114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
35115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
35117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
35118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
35119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
35120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
35122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
35123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
35125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
35126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
35127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
35128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
35130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
35131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
35133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
35134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
35135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
35136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
35138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
35139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
35141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
35142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
35143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
35144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
35146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
35151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
35152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
35153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
35155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
35160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
35161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
35162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
35164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
35165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
35166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
35167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
35171 /* TM vector builtins. */
35173 /* Reuse the existing x86-specific `struct builtin_description' cause
35174 we're lazy. Add casts to make them fit. */
35175 static const struct builtin_description bdesc_tm[] =
35177 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35178 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35179 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
35180 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35181 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35182 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35183 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
35185 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35186 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35187 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
35188 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35189 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35190 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35191 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
35193 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35194 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35195 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
35196 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35197 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35198 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35199 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
35201 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
35202 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
35203 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
35206 /* TM callbacks. */
35208 /* Return the builtin decl needed to load a vector of TYPE. */
35211 ix86_builtin_tm_load (tree type)
35213 if (TREE_CODE (type) == VECTOR_TYPE)
35215 switch (tree_to_uhwi (TYPE_SIZE (type)))
35218 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
35220 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
35222 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
35228 /* Return the builtin decl needed to store a vector of TYPE. */
35231 ix86_builtin_tm_store (tree type)
35233 if (TREE_CODE (type) == VECTOR_TYPE)
35235 switch (tree_to_uhwi (TYPE_SIZE (type)))
35238 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
35240 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
35242 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
35248 /* Initialize the transactional memory vector load/store builtins. */
35251 ix86_init_tm_builtins (void)
35253 enum ix86_builtin_func_type ftype;
35254 const struct builtin_description *d;
35257 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
35258 tree attrs_log, attrs_type_log;
35263 /* If there are no builtins defined, we must be compiling in a
35264 language without trans-mem support. */
35265 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
35268 /* Use whatever attributes a normal TM load has. */
35269 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
35270 attrs_load = DECL_ATTRIBUTES (decl);
35271 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35272 /* Use whatever attributes a normal TM store has. */
35273 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
35274 attrs_store = DECL_ATTRIBUTES (decl);
35275 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35276 /* Use whatever attributes a normal TM log has. */
35277 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
35278 attrs_log = DECL_ATTRIBUTES (decl);
35279 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
35281 for (i = 0, d = bdesc_tm;
35282 i < ARRAY_SIZE (bdesc_tm);
35285 if ((d->mask & ix86_isa_flags) != 0
35286 || (lang_hooks.builtin_function
35287 == lang_hooks.builtin_function_ext_scope))
35289 tree type, attrs, attrs_type;
35290 enum built_in_function code = (enum built_in_function) d->code;
35292 ftype = (enum ix86_builtin_func_type) d->flag;
35293 type = ix86_get_builtin_func_type (ftype);
35295 if (BUILTIN_TM_LOAD_P (code))
35297 attrs = attrs_load;
35298 attrs_type = attrs_type_load;
35300 else if (BUILTIN_TM_STORE_P (code))
35302 attrs = attrs_store;
35303 attrs_type = attrs_type_store;
35308 attrs_type = attrs_type_log;
35310 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
35311 /* The builtin without the prefix for
35312 calling it directly. */
35313 d->name + strlen ("__builtin_"),
35315 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
35316 set the TYPE_ATTRIBUTES. */
35317 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
35319 set_builtin_decl (code, decl, false);
35324 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
35325 in the current target ISA to allow the user to compile particular modules
35326 with different target specific options that differ from the command line
35329 ix86_init_mmx_sse_builtins (void)
35331 const struct builtin_description * d;
35332 enum ix86_builtin_func_type ftype;
35335 /* Add all special builtins with variable number of operands. */
35336 for (i = 0, d = bdesc_special_args;
35337 i < ARRAY_SIZE (bdesc_special_args);
35343 ftype = (enum ix86_builtin_func_type) d->flag;
35344 def_builtin (d->mask, d->name, ftype, d->code);
35347 /* Add all builtins with variable number of operands. */
35348 for (i = 0, d = bdesc_args;
35349 i < ARRAY_SIZE (bdesc_args);
35355 ftype = (enum ix86_builtin_func_type) d->flag;
35356 def_builtin_const (d->mask, d->name, ftype, d->code);
35359 /* Add all builtins with rounding. */
35360 for (i = 0, d = bdesc_round_args;
35361 i < ARRAY_SIZE (bdesc_round_args);
35367 ftype = (enum ix86_builtin_func_type) d->flag;
35368 def_builtin_const (d->mask, d->name, ftype, d->code);
35371 /* pcmpestr[im] insns. */
35372 for (i = 0, d = bdesc_pcmpestr;
35373 i < ARRAY_SIZE (bdesc_pcmpestr);
35376 if (d->code == IX86_BUILTIN_PCMPESTRM128)
35377 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
35379 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
35380 def_builtin_const (d->mask, d->name, ftype, d->code);
35383 /* pcmpistr[im] insns. */
35384 for (i = 0, d = bdesc_pcmpistr;
35385 i < ARRAY_SIZE (bdesc_pcmpistr);
35388 if (d->code == IX86_BUILTIN_PCMPISTRM128)
35389 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
35391 ftype = INT_FTYPE_V16QI_V16QI_INT;
35392 def_builtin_const (d->mask, d->name, ftype, d->code);
35395 /* comi/ucomi insns. */
35396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
35398 if (d->mask == OPTION_MASK_ISA_SSE2)
35399 ftype = INT_FTYPE_V2DF_V2DF;
35401 ftype = INT_FTYPE_V4SF_V4SF;
35402 def_builtin_const (d->mask, d->name, ftype, d->code);
35406 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
35407 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
35408 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
35409 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
35411 /* SSE or 3DNow!A */
35412 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35413 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
35414 IX86_BUILTIN_MASKMOVQ);
35417 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
35418 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
35420 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
35421 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
35422 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
35423 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
35426 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
35427 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
35428 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
35429 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
35432 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
35433 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
35434 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
35435 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
35436 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
35437 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
35438 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
35439 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
35440 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
35441 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
35442 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
35443 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
35446 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
35447 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
35450 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
35451 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
35452 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
35453 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
35454 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
35455 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
35456 IX86_BUILTIN_RDRAND64_STEP);
35459 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
35460 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
35461 IX86_BUILTIN_GATHERSIV2DF);
35463 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
35464 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
35465 IX86_BUILTIN_GATHERSIV4DF);
35467 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
35468 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
35469 IX86_BUILTIN_GATHERDIV2DF);
35471 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
35472 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
35473 IX86_BUILTIN_GATHERDIV4DF);
35475 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
35476 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
35477 IX86_BUILTIN_GATHERSIV4SF);
35479 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
35480 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
35481 IX86_BUILTIN_GATHERSIV8SF);
35483 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
35484 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
35485 IX86_BUILTIN_GATHERDIV4SF);
35487 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
35488 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
35489 IX86_BUILTIN_GATHERDIV8SF);
35491 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
35492 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
35493 IX86_BUILTIN_GATHERSIV2DI);
35495 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
35496 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
35497 IX86_BUILTIN_GATHERSIV4DI);
35499 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
35500 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
35501 IX86_BUILTIN_GATHERDIV2DI);
35503 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
35504 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
35505 IX86_BUILTIN_GATHERDIV4DI);
35507 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
35508 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
35509 IX86_BUILTIN_GATHERSIV4SI);
35511 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
35512 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
35513 IX86_BUILTIN_GATHERSIV8SI);
35515 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
35516 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
35517 IX86_BUILTIN_GATHERDIV4SI);
35519 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
35520 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
35521 IX86_BUILTIN_GATHERDIV8SI);
35523 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
35524 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
35525 IX86_BUILTIN_GATHERALTSIV4DF);
35527 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
35528 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
35529 IX86_BUILTIN_GATHERALTDIV8SF);
35531 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
35532 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
35533 IX86_BUILTIN_GATHERALTSIV4DI);
35535 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
35536 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
35537 IX86_BUILTIN_GATHERALTDIV8SI);
35540 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
35541 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
35542 IX86_BUILTIN_GATHER3SIV16SF);
35544 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
35545 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
35546 IX86_BUILTIN_GATHER3SIV8DF);
35548 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
35549 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
35550 IX86_BUILTIN_GATHER3DIV16SF);
35552 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
35553 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
35554 IX86_BUILTIN_GATHER3DIV8DF);
35556 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
35557 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
35558 IX86_BUILTIN_GATHER3SIV16SI);
35560 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
35561 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
35562 IX86_BUILTIN_GATHER3SIV8DI);
35564 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
35565 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
35566 IX86_BUILTIN_GATHER3DIV16SI);
35568 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
35569 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
35570 IX86_BUILTIN_GATHER3DIV8DI);
35572 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
35573 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
35574 IX86_BUILTIN_GATHER3ALTSIV8DF);
35576 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
35577 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
35578 IX86_BUILTIN_GATHER3ALTDIV16SF);
35580 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
35581 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
35582 IX86_BUILTIN_GATHER3ALTSIV8DI);
35584 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
35585 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
35586 IX86_BUILTIN_GATHER3ALTDIV16SI);
35588 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
35589 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
35590 IX86_BUILTIN_SCATTERSIV16SF);
35592 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
35593 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
35594 IX86_BUILTIN_SCATTERSIV8DF);
35596 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
35597 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
35598 IX86_BUILTIN_SCATTERDIV16SF);
35600 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
35601 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
35602 IX86_BUILTIN_SCATTERDIV8DF);
35604 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
35605 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
35606 IX86_BUILTIN_SCATTERSIV16SI);
35608 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
35609 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
35610 IX86_BUILTIN_SCATTERSIV8DI);
35612 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
35613 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
35614 IX86_BUILTIN_SCATTERDIV16SI);
35616 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
35617 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
35618 IX86_BUILTIN_SCATTERDIV8DI);
35621 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
35622 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
35623 IX86_BUILTIN_GATHER3SIV2DF);
35625 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
35626 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
35627 IX86_BUILTIN_GATHER3SIV4DF);
35629 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
35630 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
35631 IX86_BUILTIN_GATHER3DIV2DF);
35633 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
35634 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
35635 IX86_BUILTIN_GATHER3DIV4DF);
35637 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
35638 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
35639 IX86_BUILTIN_GATHER3SIV4SF);
35641 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
35642 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
35643 IX86_BUILTIN_GATHER3SIV8SF);
35645 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
35646 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
35647 IX86_BUILTIN_GATHER3DIV4SF);
35649 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
35650 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
35651 IX86_BUILTIN_GATHER3DIV8SF);
35653 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
35654 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
35655 IX86_BUILTIN_GATHER3SIV2DI);
35657 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
35658 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
35659 IX86_BUILTIN_GATHER3SIV4DI);
35661 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
35662 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
35663 IX86_BUILTIN_GATHER3DIV2DI);
35665 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
35666 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
35667 IX86_BUILTIN_GATHER3DIV4DI);
35669 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
35670 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
35671 IX86_BUILTIN_GATHER3SIV4SI);
35673 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
35674 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
35675 IX86_BUILTIN_GATHER3SIV8SI);
35677 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
35678 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
35679 IX86_BUILTIN_GATHER3DIV4SI);
35681 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
35682 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
35683 IX86_BUILTIN_GATHER3DIV8SI);
35685 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
35686 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
35687 IX86_BUILTIN_GATHER3ALTSIV4DF);
35689 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
35690 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
35691 IX86_BUILTIN_GATHER3ALTDIV8SF);
35693 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
35694 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
35695 IX86_BUILTIN_GATHER3ALTSIV4DI);
35697 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
35698 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
35699 IX86_BUILTIN_GATHER3ALTDIV8SI);
35701 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
35702 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
35703 IX86_BUILTIN_SCATTERSIV8SF);
35705 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
35706 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
35707 IX86_BUILTIN_SCATTERSIV4SF);
35709 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
35710 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
35711 IX86_BUILTIN_SCATTERSIV4DF);
35713 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
35714 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
35715 IX86_BUILTIN_SCATTERSIV2DF);
35717 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
35718 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
35719 IX86_BUILTIN_SCATTERDIV8SF);
35721 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
35722 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
35723 IX86_BUILTIN_SCATTERDIV4SF);
35725 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
35726 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
35727 IX86_BUILTIN_SCATTERDIV4DF);
35729 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
35730 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
35731 IX86_BUILTIN_SCATTERDIV2DF);
35733 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
35734 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
35735 IX86_BUILTIN_SCATTERSIV8SI);
35737 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
35738 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
35739 IX86_BUILTIN_SCATTERSIV4SI);
35741 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
35742 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
35743 IX86_BUILTIN_SCATTERSIV4DI);
35745 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
35746 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
35747 IX86_BUILTIN_SCATTERSIV2DI);
35749 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
35750 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
35751 IX86_BUILTIN_SCATTERDIV8SI);
35753 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
35754 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
35755 IX86_BUILTIN_SCATTERDIV4SI);
35757 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
35758 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
35759 IX86_BUILTIN_SCATTERDIV4DI);
35761 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
35762 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
35763 IX86_BUILTIN_SCATTERDIV2DI);
35764 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8df ",
35765 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
35766 IX86_BUILTIN_SCATTERALTSIV8DF);
35768 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8sf ",
35769 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
35770 IX86_BUILTIN_SCATTERALTDIV16SF);
35772 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltsiv8di ",
35773 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
35774 IX86_BUILTIN_SCATTERALTSIV8DI);
35776 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatteraltdiv8si ",
35777 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
35778 IX86_BUILTIN_SCATTERALTDIV16SI);
35781 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
35782 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35783 IX86_BUILTIN_GATHERPFDPD);
35784 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
35785 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35786 IX86_BUILTIN_GATHERPFDPS);
35787 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
35788 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35789 IX86_BUILTIN_GATHERPFQPD);
35790 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
35791 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35792 IX86_BUILTIN_GATHERPFQPS);
35793 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
35794 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
35795 IX86_BUILTIN_SCATTERPFDPD);
35796 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
35797 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
35798 IX86_BUILTIN_SCATTERPFDPS);
35799 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
35800 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
35801 IX86_BUILTIN_SCATTERPFQPD);
35802 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
35803 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
35804 IX86_BUILTIN_SCATTERPFQPS);
35807 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
35808 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
35809 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
35810 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
35811 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
35812 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
35813 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
35814 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
35815 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
35816 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
35817 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
35818 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
35819 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
35820 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
35823 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
35824 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
35826 /* MMX access to the vec_init patterns. */
35827 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
35828 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
35830 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
35831 V4HI_FTYPE_HI_HI_HI_HI,
35832 IX86_BUILTIN_VEC_INIT_V4HI);
35834 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
35835 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
35836 IX86_BUILTIN_VEC_INIT_V8QI);
35838 /* Access to the vec_extract patterns. */
35839 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
35840 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
35841 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
35842 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
35843 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
35844 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
35845 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
35846 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
35847 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
35848 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
35850 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35851 "__builtin_ia32_vec_ext_v4hi",
35852 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
35854 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
35855 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
35857 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
35858 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
35860 /* Access to the vec_set patterns. */
35861 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
35862 "__builtin_ia32_vec_set_v2di",
35863 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
35865 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
35866 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
35868 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
35869 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
35871 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
35872 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
35874 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
35875 "__builtin_ia32_vec_set_v4hi",
35876 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
35878 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
35879 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
35882 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
35883 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
35884 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
35885 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
35886 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
35887 "__builtin_ia32_rdseed_di_step",
35888 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
35891 def_builtin (0, "__builtin_ia32_addcarryx_u32",
35892 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
35893 def_builtin (OPTION_MASK_ISA_64BIT,
35894 "__builtin_ia32_addcarryx_u64",
35895 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35896 IX86_BUILTIN_ADDCARRYX64);
35899 def_builtin (0, "__builtin_ia32_sbb_u32",
35900 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
35901 def_builtin (OPTION_MASK_ISA_64BIT,
35902 "__builtin_ia32_sbb_u64",
35903 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
35904 IX86_BUILTIN_SBB64);
35906 /* Read/write FLAGS. */
35907 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
35908 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35909 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
35910 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
35911 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
35912 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
35913 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
35914 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
35917 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
35918 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
35921 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
35922 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
35924 /* MONITORX and MWAITX. */
35925 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
35926 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
35927 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
35928 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
35931 def_builtin (OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero",
35932 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO);
35934 /* Add FMA4 multi-arg argument instructions */
35935 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
35940 ftype = (enum ix86_builtin_func_type) d->flag;
35941 def_builtin_const (d->mask, d->name, ftype, d->code);
35946 ix86_init_mpx_builtins ()
35948 const struct builtin_description * d;
35949 enum ix86_builtin_func_type ftype;
35953 for (i = 0, d = bdesc_mpx;
35954 i < ARRAY_SIZE (bdesc_mpx);
35960 ftype = (enum ix86_builtin_func_type) d->flag;
35961 decl = def_builtin (d->mask, d->name, ftype, d->code);
35963 /* With no leaf and nothrow flags for MPX builtins
35964 abnormal edges may follow its call when setjmp
35965 presents in the function. Since we may have a lot
35966 of MPX builtins calls it causes lots of useless
35967 edges and enormous PHI nodes. To avoid this we mark
35968 MPX builtins as leaf and nothrow. */
35971 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35973 TREE_NOTHROW (decl) = 1;
35977 ix86_builtins_isa[(int)d->code].leaf_p = true;
35978 ix86_builtins_isa[(int)d->code].nothrow_p = true;
35982 for (i = 0, d = bdesc_mpx_const;
35983 i < ARRAY_SIZE (bdesc_mpx_const);
35989 ftype = (enum ix86_builtin_func_type) d->flag;
35990 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
35994 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
35996 TREE_NOTHROW (decl) = 1;
36000 ix86_builtins_isa[(int)d->code].leaf_p = true;
36001 ix86_builtins_isa[(int)d->code].nothrow_p = true;
36006 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
36007 to return a pointer to VERSION_DECL if the outcome of the expression
36008 formed by PREDICATE_CHAIN is true. This function will be called during
36009 version dispatch to decide which function version to execute. It returns
36010 the basic block at the end, to which more conditions can be added. */
36013 add_condition_to_bb (tree function_decl, tree version_decl,
36014 tree predicate_chain, basic_block new_bb)
36016 gimple *return_stmt;
36017 tree convert_expr, result_var;
36018 gimple *convert_stmt;
36019 gimple *call_cond_stmt;
36020 gimple *if_else_stmt;
36022 basic_block bb1, bb2, bb3;
36025 tree cond_var, and_expr_var = NULL_TREE;
36028 tree predicate_decl, predicate_arg;
36030 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36032 gcc_assert (new_bb != NULL);
36033 gseq = bb_seq (new_bb);
36036 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36037 build_fold_addr_expr (version_decl));
36038 result_var = create_tmp_var (ptr_type_node);
36039 convert_stmt = gimple_build_assign (result_var, convert_expr);
36040 return_stmt = gimple_build_return (result_var);
36042 if (predicate_chain == NULL_TREE)
36044 gimple_seq_add_stmt (&gseq, convert_stmt);
36045 gimple_seq_add_stmt (&gseq, return_stmt);
36046 set_bb_seq (new_bb, gseq);
36047 gimple_set_bb (convert_stmt, new_bb);
36048 gimple_set_bb (return_stmt, new_bb);
36053 while (predicate_chain != NULL)
36055 cond_var = create_tmp_var (integer_type_node);
36056 predicate_decl = TREE_PURPOSE (predicate_chain);
36057 predicate_arg = TREE_VALUE (predicate_chain);
36058 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36059 gimple_call_set_lhs (call_cond_stmt, cond_var);
36061 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36062 gimple_set_bb (call_cond_stmt, new_bb);
36063 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36065 predicate_chain = TREE_CHAIN (predicate_chain);
36067 if (and_expr_var == NULL)
36068 and_expr_var = cond_var;
36071 gimple *assign_stmt;
36072 /* Use MIN_EXPR to check if any integer is zero?.
36073 and_expr_var = min_expr <cond_var, and_expr_var> */
36074 assign_stmt = gimple_build_assign (and_expr_var,
36075 build2 (MIN_EXPR, integer_type_node,
36076 cond_var, and_expr_var));
36078 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
36079 gimple_set_bb (assign_stmt, new_bb);
36080 gimple_seq_add_stmt (&gseq, assign_stmt);
36084 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
36086 NULL_TREE, NULL_TREE);
36087 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36088 gimple_set_bb (if_else_stmt, new_bb);
36089 gimple_seq_add_stmt (&gseq, if_else_stmt);
36091 gimple_seq_add_stmt (&gseq, convert_stmt);
36092 gimple_seq_add_stmt (&gseq, return_stmt);
36093 set_bb_seq (new_bb, gseq);
36096 e12 = split_block (bb1, if_else_stmt);
36098 e12->flags &= ~EDGE_FALLTHRU;
36099 e12->flags |= EDGE_TRUE_VALUE;
36101 e23 = split_block (bb2, return_stmt);
36103 gimple_set_bb (convert_stmt, bb2);
36104 gimple_set_bb (return_stmt, bb2);
36107 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36110 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36117 /* This parses the attribute arguments to target in DECL and determines
36118 the right builtin to use to match the platform specification.
36119 It returns the priority value for this version decl. If PREDICATE_LIST
36120 is not NULL, it stores the list of cpu features that need to be checked
36121 before dispatching this function. */
36123 static unsigned int
36124 get_builtin_code_for_version (tree decl, tree *predicate_list)
36127 struct cl_target_option cur_target;
36129 struct cl_target_option *new_target;
36130 const char *arg_str = NULL;
36131 const char *attrs_str = NULL;
36132 char *tok_str = NULL;
36135 /* Priority of i386 features, greater value is higher priority. This is
36136 used to decide the order in which function dispatch must happen. For
36137 instance, a version specialized for SSE4.2 should be checked for dispatch
36138 before a version for SSE3, as SSE4.2 implies SSE3. */
36139 enum feature_priority
36172 enum feature_priority priority = P_ZERO;
36174 /* These are the target attribute strings for which a dispatcher is
36175 available, from fold_builtin_cpu. */
36177 static struct _feature_list
36179 const char *const name;
36180 const enum feature_priority priority;
36182 const feature_list[] =
36188 {"sse4a", P_SSE4_A},
36189 {"ssse3", P_SSSE3},
36190 {"sse4.1", P_SSE4_1},
36191 {"sse4.2", P_SSE4_2},
36192 {"popcnt", P_POPCNT},
36194 {"pclmul", P_PCLMUL},
36202 {"avx512f", P_AVX512F}
36206 static unsigned int NUM_FEATURES
36207 = sizeof (feature_list) / sizeof (struct _feature_list);
36211 tree predicate_chain = NULL_TREE;
36212 tree predicate_decl, predicate_arg;
36214 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36215 gcc_assert (attrs != NULL);
36217 attrs = TREE_VALUE (TREE_VALUE (attrs));
36219 gcc_assert (TREE_CODE (attrs) == STRING_CST);
36220 attrs_str = TREE_STRING_POINTER (attrs);
36222 /* Return priority zero for default function. */
36223 if (strcmp (attrs_str, "default") == 0)
36226 /* Handle arch= if specified. For priority, set it to be 1 more than
36227 the best instruction set the processor can handle. For instance, if
36228 there is a version for atom and a version for ssse3 (the highest ISA
36229 priority for atom), the atom version must be checked for dispatch
36230 before the ssse3 version. */
36231 if (strstr (attrs_str, "arch=") != NULL)
36233 cl_target_option_save (&cur_target, &global_options);
36234 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
36235 &global_options_set);
36237 gcc_assert (target_node);
36238 new_target = TREE_TARGET_OPTION (target_node);
36239 gcc_assert (new_target);
36241 if (new_target->arch_specified && new_target->arch > 0)
36243 switch (new_target->arch)
36245 case PROCESSOR_CORE2:
36247 priority = P_PROC_SSSE3;
36249 case PROCESSOR_NEHALEM:
36250 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
36251 arg_str = "westmere";
36253 /* We translate "arch=corei7" and "arch=nehalem" to
36254 "corei7" so that it will be mapped to M_INTEL_COREI7
36255 as cpu type to cover all M_INTEL_COREI7_XXXs. */
36256 arg_str = "corei7";
36257 priority = P_PROC_SSE4_2;
36259 case PROCESSOR_SANDYBRIDGE:
36260 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
36261 arg_str = "ivybridge";
36263 arg_str = "sandybridge";
36264 priority = P_PROC_AVX;
36266 case PROCESSOR_HASWELL:
36267 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
36268 arg_str = "skylake-avx512";
36269 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_XSAVES)
36270 arg_str = "skylake";
36271 else if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
36272 arg_str = "broadwell";
36274 arg_str = "haswell";
36275 priority = P_PROC_AVX2;
36277 case PROCESSOR_BONNELL:
36278 arg_str = "bonnell";
36279 priority = P_PROC_SSSE3;
36281 case PROCESSOR_KNL:
36283 priority = P_PROC_AVX512F;
36285 case PROCESSOR_SILVERMONT:
36286 arg_str = "silvermont";
36287 priority = P_PROC_SSE4_2;
36289 case PROCESSOR_AMDFAM10:
36290 arg_str = "amdfam10h";
36291 priority = P_PROC_SSE4_A;
36293 case PROCESSOR_BTVER1:
36294 arg_str = "btver1";
36295 priority = P_PROC_SSE4_A;
36297 case PROCESSOR_BTVER2:
36298 arg_str = "btver2";
36299 priority = P_PROC_BMI;
36301 case PROCESSOR_BDVER1:
36302 arg_str = "bdver1";
36303 priority = P_PROC_XOP;
36305 case PROCESSOR_BDVER2:
36306 arg_str = "bdver2";
36307 priority = P_PROC_FMA;
36309 case PROCESSOR_BDVER3:
36310 arg_str = "bdver3";
36311 priority = P_PROC_FMA;
36313 case PROCESSOR_BDVER4:
36314 arg_str = "bdver4";
36315 priority = P_PROC_AVX2;
36317 case PROCESSOR_ZNVER1:
36318 arg_str = "znver1";
36319 priority = P_PROC_AVX2;
36324 cl_target_option_restore (&global_options, &cur_target);
36326 if (predicate_list && arg_str == NULL)
36328 error_at (DECL_SOURCE_LOCATION (decl),
36329 "No dispatcher found for the versioning attributes");
36333 if (predicate_list)
36335 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
36336 /* For a C string literal the length includes the trailing NULL. */
36337 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36338 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36343 /* Process feature name. */
36344 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
36345 strcpy (tok_str, attrs_str);
36346 token = strtok (tok_str, ",");
36347 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
36349 while (token != NULL)
36351 /* Do not process "arch=" */
36352 if (strncmp (token, "arch=", 5) == 0)
36354 token = strtok (NULL, ",");
36357 for (i = 0; i < NUM_FEATURES; ++i)
36359 if (strcmp (token, feature_list[i].name) == 0)
36361 if (predicate_list)
36363 predicate_arg = build_string_literal (
36364 strlen (feature_list[i].name) + 1,
36365 feature_list[i].name);
36366 predicate_chain = tree_cons (predicate_decl, predicate_arg,
36369 /* Find the maximum priority feature. */
36370 if (feature_list[i].priority > priority)
36371 priority = feature_list[i].priority;
36376 if (predicate_list && i == NUM_FEATURES)
36378 error_at (DECL_SOURCE_LOCATION (decl),
36379 "No dispatcher found for %s", token);
36382 token = strtok (NULL, ",");
36386 if (predicate_list && predicate_chain == NULL_TREE)
36388 error_at (DECL_SOURCE_LOCATION (decl),
36389 "No dispatcher found for the versioning attributes : %s",
36393 else if (predicate_list)
36395 predicate_chain = nreverse (predicate_chain);
36396 *predicate_list = predicate_chain;
36402 /* This compares the priority of target features in function DECL1
36403 and DECL2. It returns positive value if DECL1 is higher priority,
36404 negative value if DECL2 is higher priority and 0 if they are the
36408 ix86_compare_version_priority (tree decl1, tree decl2)
36410 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
36411 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
36413 return (int)priority1 - (int)priority2;
36416 /* V1 and V2 point to function versions with different priorities
36417 based on the target ISA. This function compares their priorities. */
36420 feature_compare (const void *v1, const void *v2)
36422 typedef struct _function_version_info
36425 tree predicate_chain;
36426 unsigned int dispatch_priority;
36427 } function_version_info;
36429 const function_version_info c1 = *(const function_version_info *)v1;
36430 const function_version_info c2 = *(const function_version_info *)v2;
36431 return (c2.dispatch_priority - c1.dispatch_priority);
36434 /* This function generates the dispatch function for
36435 multi-versioned functions. DISPATCH_DECL is the function which will
36436 contain the dispatch logic. FNDECLS are the function choices for
36437 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
36438 in DISPATCH_DECL in which the dispatch code is generated. */
36441 dispatch_function_versions (tree dispatch_decl,
36443 basic_block *empty_bb)
36446 gimple *ifunc_cpu_init_stmt;
36450 vec<tree> *fndecls;
36451 unsigned int num_versions = 0;
36452 unsigned int actual_versions = 0;
36455 struct _function_version_info
36458 tree predicate_chain;
36459 unsigned int dispatch_priority;
36460 }*function_version_info;
36462 gcc_assert (dispatch_decl != NULL
36463 && fndecls_p != NULL
36464 && empty_bb != NULL);
36466 /*fndecls_p is actually a vector. */
36467 fndecls = static_cast<vec<tree> *> (fndecls_p);
36469 /* At least one more version other than the default. */
36470 num_versions = fndecls->length ();
36471 gcc_assert (num_versions >= 2);
36473 function_version_info = (struct _function_version_info *)
36474 XNEWVEC (struct _function_version_info, (num_versions - 1));
36476 /* The first version in the vector is the default decl. */
36477 default_decl = (*fndecls)[0];
36479 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
36481 gseq = bb_seq (*empty_bb);
36482 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
36483 constructors, so explicity call __builtin_cpu_init here. */
36484 ifunc_cpu_init_stmt = gimple_build_call_vec (
36485 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
36486 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
36487 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
36488 set_bb_seq (*empty_bb, gseq);
36493 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
36495 tree version_decl = ele;
36496 tree predicate_chain = NULL_TREE;
36497 unsigned int priority;
36498 /* Get attribute string, parse it and find the right predicate decl.
36499 The predicate function could be a lengthy combination of many
36500 features, like arch-type and various isa-variants. */
36501 priority = get_builtin_code_for_version (version_decl,
36504 if (predicate_chain == NULL_TREE)
36507 function_version_info [actual_versions].version_decl = version_decl;
36508 function_version_info [actual_versions].predicate_chain
36510 function_version_info [actual_versions].dispatch_priority = priority;
36514 /* Sort the versions according to descending order of dispatch priority. The
36515 priority is based on the ISA. This is not a perfect solution. There
36516 could still be ambiguity. If more than one function version is suitable
36517 to execute, which one should be dispatched? In future, allow the user
36518 to specify a dispatch priority next to the version. */
36519 qsort (function_version_info, actual_versions,
36520 sizeof (struct _function_version_info), feature_compare);
36522 for (i = 0; i < actual_versions; ++i)
36523 *empty_bb = add_condition_to_bb (dispatch_decl,
36524 function_version_info[i].version_decl,
36525 function_version_info[i].predicate_chain,
36528 /* dispatch default version at the end. */
36529 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
36532 free (function_version_info);
36536 /* Comparator function to be used in qsort routine to sort attribute
36537 specification strings to "target". */
36540 attr_strcmp (const void *v1, const void *v2)
36542 const char *c1 = *(char *const*)v1;
36543 const char *c2 = *(char *const*)v2;
36544 return strcmp (c1, c2);
36547 /* ARGLIST is the argument to target attribute. This function tokenizes
36548 the comma separated arguments, sorts them and returns a string which
36549 is a unique identifier for the comma separated arguments. It also
36550 replaces non-identifier characters "=,-" with "_". */
36553 sorted_attr_string (tree arglist)
36556 size_t str_len_sum = 0;
36557 char **args = NULL;
36558 char *attr_str, *ret_str;
36560 unsigned int argnum = 1;
36563 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36565 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36566 size_t len = strlen (str);
36567 str_len_sum += len + 1;
36568 if (arg != arglist)
36570 for (i = 0; i < strlen (str); i++)
36575 attr_str = XNEWVEC (char, str_len_sum);
36577 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
36579 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
36580 size_t len = strlen (str);
36581 memcpy (attr_str + str_len_sum, str, len);
36582 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
36583 str_len_sum += len + 1;
36586 /* Replace "=,-" with "_". */
36587 for (i = 0; i < strlen (attr_str); i++)
36588 if (attr_str[i] == '=' || attr_str[i]== '-')
36594 args = XNEWVEC (char *, argnum);
36597 attr = strtok (attr_str, ",");
36598 while (attr != NULL)
36602 attr = strtok (NULL, ",");
36605 qsort (args, argnum, sizeof (char *), attr_strcmp);
36607 ret_str = XNEWVEC (char, str_len_sum);
36609 for (i = 0; i < argnum; i++)
36611 size_t len = strlen (args[i]);
36612 memcpy (ret_str + str_len_sum, args[i], len);
36613 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
36614 str_len_sum += len + 1;
36618 XDELETEVEC (attr_str);
36622 /* This function changes the assembler name for functions that are
36623 versions. If DECL is a function version and has a "target"
36624 attribute, it appends the attribute string to its assembler name. */
36627 ix86_mangle_function_version_assembler_name (tree decl, tree id)
36630 const char *orig_name, *version_string;
36631 char *attr_str, *assembler_name;
36633 if (DECL_DECLARED_INLINE_P (decl)
36634 && lookup_attribute ("gnu_inline",
36635 DECL_ATTRIBUTES (decl)))
36636 error_at (DECL_SOURCE_LOCATION (decl),
36637 "Function versions cannot be marked as gnu_inline,"
36638 " bodies have to be generated");
36640 if (DECL_VIRTUAL_P (decl)
36641 || DECL_VINDEX (decl))
36642 sorry ("Virtual function multiversioning not supported");
36644 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36646 /* target attribute string cannot be NULL. */
36647 gcc_assert (version_attr != NULL_TREE);
36649 orig_name = IDENTIFIER_POINTER (id);
36651 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
36653 if (strcmp (version_string, "default") == 0)
36656 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
36657 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
36659 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
36661 /* Allow assembler name to be modified if already set. */
36662 if (DECL_ASSEMBLER_NAME_SET_P (decl))
36663 SET_DECL_RTL (decl, NULL);
36665 tree ret = get_identifier (assembler_name);
36666 XDELETEVEC (attr_str);
36667 XDELETEVEC (assembler_name);
36671 /* This function returns true if FN1 and FN2 are versions of the same function,
36672 that is, the target strings of the function decls are different. This assumes
36673 that FN1 and FN2 have the same signature. */
36676 ix86_function_versions (tree fn1, tree fn2)
36679 char *target1, *target2;
36682 if (TREE_CODE (fn1) != FUNCTION_DECL
36683 || TREE_CODE (fn2) != FUNCTION_DECL)
36686 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
36687 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
36689 /* At least one function decl should have the target attribute specified. */
36690 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
36693 /* Diagnose missing target attribute if one of the decls is already
36694 multi-versioned. */
36695 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
36697 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
36699 if (attr2 != NULL_TREE)
36701 std::swap (fn1, fn2);
36704 error_at (DECL_SOURCE_LOCATION (fn2),
36705 "missing %<target%> attribute for multi-versioned %D",
36707 inform (DECL_SOURCE_LOCATION (fn1),
36708 "previous declaration of %D", fn1);
36709 /* Prevent diagnosing of the same error multiple times. */
36710 DECL_ATTRIBUTES (fn2)
36711 = tree_cons (get_identifier ("target"),
36712 copy_node (TREE_VALUE (attr1)),
36713 DECL_ATTRIBUTES (fn2));
36718 target1 = sorted_attr_string (TREE_VALUE (attr1));
36719 target2 = sorted_attr_string (TREE_VALUE (attr2));
36721 /* The sorted target strings must be different for fn1 and fn2
36723 if (strcmp (target1, target2) == 0)
36728 XDELETEVEC (target1);
36729 XDELETEVEC (target2);
36735 ix86_mangle_decl_assembler_name (tree decl, tree id)
36737 /* For function version, add the target suffix to the assembler name. */
36738 if (TREE_CODE (decl) == FUNCTION_DECL
36739 && DECL_FUNCTION_VERSIONED (decl))
36740 id = ix86_mangle_function_version_assembler_name (decl, id);
36741 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
36742 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
36748 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
36749 is true, append the full path name of the source file. */
36752 make_name (tree decl, const char *suffix, bool make_unique)
36754 char *global_var_name;
36757 const char *unique_name = NULL;
36759 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
36761 /* Get a unique name that can be used globally without any chances
36762 of collision at link time. */
36764 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
36766 name_len = strlen (name) + strlen (suffix) + 2;
36769 name_len += strlen (unique_name) + 1;
36770 global_var_name = XNEWVEC (char, name_len);
36772 /* Use '.' to concatenate names as it is demangler friendly. */
36774 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
36777 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
36779 return global_var_name;
36782 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36784 /* Make a dispatcher declaration for the multi-versioned function DECL.
36785 Calls to DECL function will be replaced with calls to the dispatcher
36786 by the front-end. Return the decl created. */
36789 make_dispatcher_decl (const tree decl)
36793 tree fn_type, func_type;
36794 bool is_uniq = false;
36796 if (TREE_PUBLIC (decl) == 0)
36799 func_name = make_name (decl, "ifunc", is_uniq);
36801 fn_type = TREE_TYPE (decl);
36802 func_type = build_function_type (TREE_TYPE (fn_type),
36803 TYPE_ARG_TYPES (fn_type));
36805 func_decl = build_fn_decl (func_name, func_type);
36806 XDELETEVEC (func_name);
36807 TREE_USED (func_decl) = 1;
36808 DECL_CONTEXT (func_decl) = NULL_TREE;
36809 DECL_INITIAL (func_decl) = error_mark_node;
36810 DECL_ARTIFICIAL (func_decl) = 1;
36811 /* Mark this func as external, the resolver will flip it again if
36812 it gets generated. */
36813 DECL_EXTERNAL (func_decl) = 1;
36814 /* This will be of type IFUNCs have to be externally visible. */
36815 TREE_PUBLIC (func_decl) = 1;
36822 /* Returns true if decl is multi-versioned and DECL is the default function,
36823 that is it is not tagged with target specific optimization. */
36826 is_function_default_version (const tree decl)
36828 if (TREE_CODE (decl) != FUNCTION_DECL
36829 || !DECL_FUNCTION_VERSIONED (decl))
36831 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
36833 attr = TREE_VALUE (TREE_VALUE (attr));
36834 return (TREE_CODE (attr) == STRING_CST
36835 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
36838 /* Make a dispatcher declaration for the multi-versioned function DECL.
36839 Calls to DECL function will be replaced with calls to the dispatcher
36840 by the front-end. Returns the decl of the dispatcher function. */
36843 ix86_get_function_versions_dispatcher (void *decl)
36845 tree fn = (tree) decl;
36846 struct cgraph_node *node = NULL;
36847 struct cgraph_node *default_node = NULL;
36848 struct cgraph_function_version_info *node_v = NULL;
36849 struct cgraph_function_version_info *first_v = NULL;
36851 tree dispatch_decl = NULL;
36853 struct cgraph_function_version_info *default_version_info = NULL;
36855 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36857 node = cgraph_node::get (fn);
36858 gcc_assert (node != NULL);
36860 node_v = node->function_version ();
36861 gcc_assert (node_v != NULL);
36863 if (node_v->dispatcher_resolver != NULL)
36864 return node_v->dispatcher_resolver;
36866 /* Find the default version and make it the first node. */
36868 /* Go to the beginning of the chain. */
36869 while (first_v->prev != NULL)
36870 first_v = first_v->prev;
36871 default_version_info = first_v;
36872 while (default_version_info != NULL)
36874 if (is_function_default_version
36875 (default_version_info->this_node->decl))
36877 default_version_info = default_version_info->next;
36880 /* If there is no default node, just return NULL. */
36881 if (default_version_info == NULL)
36884 /* Make default info the first node. */
36885 if (first_v != default_version_info)
36887 default_version_info->prev->next = default_version_info->next;
36888 if (default_version_info->next)
36889 default_version_info->next->prev = default_version_info->prev;
36890 first_v->prev = default_version_info;
36891 default_version_info->next = first_v;
36892 default_version_info->prev = NULL;
36895 default_node = default_version_info->this_node;
36897 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
36898 if (targetm.has_ifunc_p ())
36900 struct cgraph_function_version_info *it_v = NULL;
36901 struct cgraph_node *dispatcher_node = NULL;
36902 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36904 /* Right now, the dispatching is done via ifunc. */
36905 dispatch_decl = make_dispatcher_decl (default_node->decl);
36907 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36908 gcc_assert (dispatcher_node != NULL);
36909 dispatcher_node->dispatcher_function = 1;
36910 dispatcher_version_info
36911 = dispatcher_node->insert_new_function_version ();
36912 dispatcher_version_info->next = default_version_info;
36913 dispatcher_node->definition = 1;
36915 /* Set the dispatcher for all the versions. */
36916 it_v = default_version_info;
36917 while (it_v != NULL)
36919 it_v->dispatcher_resolver = dispatch_decl;
36926 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36927 "multiversioning needs ifunc which is not supported "
36931 return dispatch_decl;
36934 /* Make the resolver function decl to dispatch the versions of
36935 a multi-versioned function, DEFAULT_DECL. Create an
36936 empty basic block in the resolver and store the pointer in
36937 EMPTY_BB. Return the decl of the resolver function. */
36940 make_resolver_func (const tree default_decl,
36941 const tree dispatch_decl,
36942 basic_block *empty_bb)
36944 char *resolver_name;
36945 tree decl, type, decl_name, t;
36946 bool is_uniq = false;
36948 /* IFUNC's have to be globally visible. So, if the default_decl is
36949 not, then the name of the IFUNC should be made unique. */
36950 if (TREE_PUBLIC (default_decl) == 0)
36953 /* Append the filename to the resolver function if the versions are
36954 not externally visible. This is because the resolver function has
36955 to be externally visible for the loader to find it. So, appending
36956 the filename will prevent conflicts with a resolver function from
36957 another module which is based on the same version name. */
36958 resolver_name = make_name (default_decl, "resolver", is_uniq);
36960 /* The resolver function should return a (void *). */
36961 type = build_function_type_list (ptr_type_node, NULL_TREE);
36963 decl = build_fn_decl (resolver_name, type);
36964 decl_name = get_identifier (resolver_name);
36965 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36967 DECL_NAME (decl) = decl_name;
36968 TREE_USED (decl) = 1;
36969 DECL_ARTIFICIAL (decl) = 1;
36970 DECL_IGNORED_P (decl) = 0;
36971 /* IFUNC resolvers have to be externally visible. */
36972 TREE_PUBLIC (decl) = 1;
36973 DECL_UNINLINABLE (decl) = 1;
36975 /* Resolver is not external, body is generated. */
36976 DECL_EXTERNAL (decl) = 0;
36977 DECL_EXTERNAL (dispatch_decl) = 0;
36979 DECL_CONTEXT (decl) = NULL_TREE;
36980 DECL_INITIAL (decl) = make_node (BLOCK);
36981 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36983 if (DECL_COMDAT_GROUP (default_decl)
36984 || TREE_PUBLIC (default_decl))
36986 /* In this case, each translation unit with a call to this
36987 versioned function will put out a resolver. Ensure it
36988 is comdat to keep just one copy. */
36989 DECL_COMDAT (decl) = 1;
36990 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
36992 /* Build result decl and add to function_decl. */
36993 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36994 DECL_ARTIFICIAL (t) = 1;
36995 DECL_IGNORED_P (t) = 1;
36996 DECL_RESULT (decl) = t;
36998 gimplify_function_tree (decl);
36999 push_cfun (DECL_STRUCT_FUNCTION (decl));
37000 *empty_bb = init_lowered_empty_function (decl, false, 0);
37002 cgraph_node::add_new_function (decl, true);
37003 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37007 gcc_assert (dispatch_decl != NULL);
37008 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37009 DECL_ATTRIBUTES (dispatch_decl)
37010 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37012 /* Create the alias for dispatch to resolver here. */
37013 /*cgraph_create_function_alias (dispatch_decl, decl);*/
37014 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37015 XDELETEVEC (resolver_name);
37019 /* Generate the dispatching code body to dispatch multi-versioned function
37020 DECL. The target hook is called to process the "target" attributes and
37021 provide the code to dispatch the right function at run-time. NODE points
37022 to the dispatcher decl whose body will be created. */
37025 ix86_generate_version_dispatcher_body (void *node_p)
37027 tree resolver_decl;
37028 basic_block empty_bb;
37029 tree default_ver_decl;
37030 struct cgraph_node *versn;
37031 struct cgraph_node *node;
37033 struct cgraph_function_version_info *node_version_info = NULL;
37034 struct cgraph_function_version_info *versn_info = NULL;
37036 node = (cgraph_node *)node_p;
37038 node_version_info = node->function_version ();
37039 gcc_assert (node->dispatcher_function
37040 && node_version_info != NULL);
37042 if (node_version_info->dispatcher_resolver)
37043 return node_version_info->dispatcher_resolver;
37045 /* The first version in the chain corresponds to the default version. */
37046 default_ver_decl = node_version_info->next->this_node->decl;
37048 /* node is going to be an alias, so remove the finalized bit. */
37049 node->definition = false;
37051 resolver_decl = make_resolver_func (default_ver_decl,
37052 node->decl, &empty_bb);
37054 node_version_info->dispatcher_resolver = resolver_decl;
37056 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
37058 auto_vec<tree, 2> fn_ver_vec;
37060 for (versn_info = node_version_info->next; versn_info;
37061 versn_info = versn_info->next)
37063 versn = versn_info->this_node;
37064 /* Check for virtual functions here again, as by this time it should
37065 have been determined if this function needs a vtable index or
37066 not. This happens for methods in derived classes that override
37067 virtual methods in base classes but are not explicitly marked as
37069 if (DECL_VINDEX (versn->decl))
37070 sorry ("Virtual function multiversioning not supported");
37072 fn_ver_vec.safe_push (versn->decl);
37075 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
37076 cgraph_edge::rebuild_edges ();
37078 return resolver_decl;
37080 /* This builds the processor_model struct type defined in
37081 libgcc/config/i386/cpuinfo.c */
37084 build_processor_model_struct (void)
37086 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
37088 tree field = NULL_TREE, field_chain = NULL_TREE;
37090 tree type = make_node (RECORD_TYPE);
37092 /* The first 3 fields are unsigned int. */
37093 for (i = 0; i < 3; ++i)
37095 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37096 get_identifier (field_name[i]), unsigned_type_node);
37097 if (field_chain != NULL_TREE)
37098 DECL_CHAIN (field) = field_chain;
37099 field_chain = field;
37102 /* The last field is an array of unsigned integers of size one. */
37103 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
37104 get_identifier (field_name[3]),
37105 build_array_type (unsigned_type_node,
37106 build_index_type (size_one_node)));
37107 if (field_chain != NULL_TREE)
37108 DECL_CHAIN (field) = field_chain;
37109 field_chain = field;
37111 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
37115 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
37118 make_var_decl (tree type, const char *name)
37122 new_decl = build_decl (UNKNOWN_LOCATION,
37124 get_identifier(name),
37127 DECL_EXTERNAL (new_decl) = 1;
37128 TREE_STATIC (new_decl) = 1;
37129 TREE_PUBLIC (new_decl) = 1;
37130 DECL_INITIAL (new_decl) = 0;
37131 DECL_ARTIFICIAL (new_decl) = 0;
37132 DECL_PRESERVE_P (new_decl) = 1;
37134 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
37135 assemble_variable (new_decl, 0, 0, 0);
37140 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
37141 into an integer defined in libgcc/config/i386/cpuinfo.c */
37144 fold_builtin_cpu (tree fndecl, tree *args)
37147 enum ix86_builtins fn_code = (enum ix86_builtins)
37148 DECL_FUNCTION_CODE (fndecl);
37149 tree param_string_cst = NULL;
37151 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
37152 enum processor_features
37185 /* These are the values for vendor types and cpu types and subtypes
37186 in cpuinfo.c. Cpu types and subtypes should be subtracted by
37187 the corresponding start value. */
37188 enum processor_model
37198 M_INTEL_SILVERMONT,
37202 M_CPU_SUBTYPE_START,
37203 M_INTEL_COREI7_NEHALEM,
37204 M_INTEL_COREI7_WESTMERE,
37205 M_INTEL_COREI7_SANDYBRIDGE,
37206 M_AMDFAM10H_BARCELONA,
37207 M_AMDFAM10H_SHANGHAI,
37208 M_AMDFAM10H_ISTANBUL,
37209 M_AMDFAM15H_BDVER1,
37210 M_AMDFAM15H_BDVER2,
37211 M_AMDFAM15H_BDVER3,
37212 M_AMDFAM15H_BDVER4,
37213 M_AMDFAM17H_ZNVER1,
37214 M_INTEL_COREI7_IVYBRIDGE,
37215 M_INTEL_COREI7_HASWELL,
37216 M_INTEL_COREI7_BROADWELL,
37217 M_INTEL_COREI7_SKYLAKE,
37218 M_INTEL_COREI7_SKYLAKE_AVX512
37221 static struct _arch_names_table
37223 const char *const name;
37224 const enum processor_model model;
37226 const arch_names_table[] =
37229 {"intel", M_INTEL},
37230 {"atom", M_INTEL_BONNELL},
37231 {"slm", M_INTEL_SILVERMONT},
37232 {"core2", M_INTEL_CORE2},
37233 {"corei7", M_INTEL_COREI7},
37234 {"nehalem", M_INTEL_COREI7_NEHALEM},
37235 {"westmere", M_INTEL_COREI7_WESTMERE},
37236 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
37237 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
37238 {"haswell", M_INTEL_COREI7_HASWELL},
37239 {"broadwell", M_INTEL_COREI7_BROADWELL},
37240 {"skylake", M_INTEL_COREI7_SKYLAKE},
37241 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512},
37242 {"bonnell", M_INTEL_BONNELL},
37243 {"silvermont", M_INTEL_SILVERMONT},
37244 {"knl", M_INTEL_KNL},
37245 {"amdfam10h", M_AMDFAM10H},
37246 {"barcelona", M_AMDFAM10H_BARCELONA},
37247 {"shanghai", M_AMDFAM10H_SHANGHAI},
37248 {"istanbul", M_AMDFAM10H_ISTANBUL},
37249 {"btver1", M_AMD_BTVER1},
37250 {"amdfam15h", M_AMDFAM15H},
37251 {"bdver1", M_AMDFAM15H_BDVER1},
37252 {"bdver2", M_AMDFAM15H_BDVER2},
37253 {"bdver3", M_AMDFAM15H_BDVER3},
37254 {"bdver4", M_AMDFAM15H_BDVER4},
37255 {"btver2", M_AMD_BTVER2},
37256 {"znver1", M_AMDFAM17H_ZNVER1},
37259 static struct _isa_names_table
37261 const char *const name;
37262 const enum processor_features feature;
37264 const isa_names_table[] =
37268 {"popcnt", F_POPCNT},
37272 {"ssse3", F_SSSE3},
37273 {"sse4a", F_SSE4_A},
37274 {"sse4.1", F_SSE4_1},
37275 {"sse4.2", F_SSE4_2},
37281 {"avx512f", F_AVX512F},
37285 {"pclmul", F_PCLMUL},
37286 {"avx512vl",F_AVX512VL},
37287 {"avx512bw",F_AVX512BW},
37288 {"avx512dq",F_AVX512DQ},
37289 {"avx512cd",F_AVX512CD},
37290 {"avx512er",F_AVX512ER},
37291 {"avx512pf",F_AVX512PF},
37292 {"avx512vbmi",F_AVX512VBMI},
37293 {"avx512ifma",F_AVX512IFMA},
37296 tree __processor_model_type = build_processor_model_struct ();
37297 tree __cpu_model_var = make_var_decl (__processor_model_type,
37301 varpool_node::add (__cpu_model_var);
37303 gcc_assert ((args != NULL) && (*args != NULL));
37305 param_string_cst = *args;
37306 while (param_string_cst
37307 && TREE_CODE (param_string_cst) != STRING_CST)
37309 /* *args must be a expr that can contain other EXPRS leading to a
37311 if (!EXPR_P (param_string_cst))
37313 error ("Parameter to builtin must be a string constant or literal");
37314 return integer_zero_node;
37316 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
37319 gcc_assert (param_string_cst);
37321 if (fn_code == IX86_BUILTIN_CPU_IS)
37327 unsigned int field_val = 0;
37328 unsigned int NUM_ARCH_NAMES
37329 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
37331 for (i = 0; i < NUM_ARCH_NAMES; i++)
37332 if (strcmp (arch_names_table[i].name,
37333 TREE_STRING_POINTER (param_string_cst)) == 0)
37336 if (i == NUM_ARCH_NAMES)
37338 error ("Parameter to builtin not valid: %s",
37339 TREE_STRING_POINTER (param_string_cst));
37340 return integer_zero_node;
37343 field = TYPE_FIELDS (__processor_model_type);
37344 field_val = arch_names_table[i].model;
37346 /* CPU types are stored in the next field. */
37347 if (field_val > M_CPU_TYPE_START
37348 && field_val < M_CPU_SUBTYPE_START)
37350 field = DECL_CHAIN (field);
37351 field_val -= M_CPU_TYPE_START;
37354 /* CPU subtypes are stored in the next field. */
37355 if (field_val > M_CPU_SUBTYPE_START)
37357 field = DECL_CHAIN ( DECL_CHAIN (field));
37358 field_val -= M_CPU_SUBTYPE_START;
37361 /* Get the appropriate field in __cpu_model. */
37362 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37365 /* Check the value. */
37366 final = build2 (EQ_EXPR, unsigned_type_node, ref,
37367 build_int_cstu (unsigned_type_node, field_val));
37368 return build1 (CONVERT_EXPR, integer_type_node, final);
37370 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37377 unsigned int field_val = 0;
37378 unsigned int NUM_ISA_NAMES
37379 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
37381 for (i = 0; i < NUM_ISA_NAMES; i++)
37382 if (strcmp (isa_names_table[i].name,
37383 TREE_STRING_POINTER (param_string_cst)) == 0)
37386 if (i == NUM_ISA_NAMES)
37388 error ("Parameter to builtin not valid: %s",
37389 TREE_STRING_POINTER (param_string_cst));
37390 return integer_zero_node;
37393 field = TYPE_FIELDS (__processor_model_type);
37394 /* Get the last field, which is __cpu_features. */
37395 while (DECL_CHAIN (field))
37396 field = DECL_CHAIN (field);
37398 /* Get the appropriate field: __cpu_model.__cpu_features */
37399 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
37402 /* Access the 0th element of __cpu_features array. */
37403 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
37404 integer_zero_node, NULL_TREE, NULL_TREE);
37406 field_val = (1 << isa_names_table[i].feature);
37407 /* Return __cpu_model.__cpu_features[0] & field_val */
37408 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
37409 build_int_cstu (unsigned_type_node, field_val));
37410 return build1 (CONVERT_EXPR, integer_type_node, final);
37412 gcc_unreachable ();
37416 ix86_fold_builtin (tree fndecl, int n_args,
37417 tree *args, bool ignore ATTRIBUTE_UNUSED)
37419 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
37421 enum ix86_builtins fn_code = (enum ix86_builtins)
37422 DECL_FUNCTION_CODE (fndecl);
37423 if (fn_code == IX86_BUILTIN_CPU_IS
37424 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
37426 gcc_assert (n_args == 1);
37427 return fold_builtin_cpu (fndecl, args);
37431 #ifdef SUBTARGET_FOLD_BUILTIN
37432 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
37438 /* Make builtins to detect cpu type and features supported. NAME is
37439 the builtin name, CODE is the builtin code, and FTYPE is the function
37440 type of the builtin. */
37443 make_cpu_type_builtin (const char* name, int code,
37444 enum ix86_builtin_func_type ftype, bool is_const)
37449 type = ix86_get_builtin_func_type (ftype);
37450 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
37452 gcc_assert (decl != NULL_TREE);
37453 ix86_builtins[(int) code] = decl;
37454 TREE_READONLY (decl) = is_const;
37457 /* Make builtins to get CPU type and features supported. The created
37460 __builtin_cpu_init (), to detect cpu type and features,
37461 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
37462 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
37466 ix86_init_platform_type_builtins (void)
37468 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
37469 INT_FTYPE_VOID, false);
37470 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
37471 INT_FTYPE_PCCHAR, true);
37472 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
37473 INT_FTYPE_PCCHAR, true);
37476 /* Internal method for ix86_init_builtins. */
37479 ix86_init_builtins_va_builtins_abi (void)
37481 tree ms_va_ref, sysv_va_ref;
37482 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
37483 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
37484 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
37485 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
37489 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
37490 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
37491 ms_va_ref = build_reference_type (ms_va_list_type_node);
37493 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
37496 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37497 fnvoid_va_start_ms =
37498 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
37499 fnvoid_va_end_sysv =
37500 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
37501 fnvoid_va_start_sysv =
37502 build_varargs_function_type_list (void_type_node, sysv_va_ref,
37504 fnvoid_va_copy_ms =
37505 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
37507 fnvoid_va_copy_sysv =
37508 build_function_type_list (void_type_node, sysv_va_ref,
37509 sysv_va_ref, NULL_TREE);
37511 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
37512 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
37513 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
37514 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
37515 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
37516 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
37517 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
37518 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37519 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
37520 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37521 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
37522 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
37526 ix86_init_builtin_types (void)
37528 tree float128_type_node, float80_type_node;
37530 /* The __float80 type. */
37531 float80_type_node = long_double_type_node;
37532 if (TYPE_MODE (float80_type_node) != XFmode)
37534 /* The __float80 type. */
37535 float80_type_node = make_node (REAL_TYPE);
37537 TYPE_PRECISION (float80_type_node) = 80;
37538 layout_type (float80_type_node);
37540 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
37542 /* The __float128 type. */
37543 float128_type_node = make_node (REAL_TYPE);
37544 TYPE_PRECISION (float128_type_node) = 128;
37545 layout_type (float128_type_node);
37546 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
37548 /* This macro is built by i386-builtin-types.awk. */
37549 DEFINE_BUILTIN_PRIMITIVE_TYPES;
37553 ix86_init_builtins (void)
37557 ix86_init_builtin_types ();
37559 /* Builtins to get CPU type and features. */
37560 ix86_init_platform_type_builtins ();
37562 /* TFmode support builtins. */
37563 def_builtin_const (0, "__builtin_infq",
37564 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
37565 def_builtin_const (0, "__builtin_huge_valq",
37566 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
37568 /* We will expand them to normal call if SSE isn't available since
37569 they are used by libgcc. */
37570 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
37571 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
37572 BUILT_IN_MD, "__fabstf2", NULL_TREE);
37573 TREE_READONLY (t) = 1;
37574 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
37576 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
37577 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
37578 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
37579 TREE_READONLY (t) = 1;
37580 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
37582 ix86_init_tm_builtins ();
37583 ix86_init_mmx_sse_builtins ();
37584 ix86_init_mpx_builtins ();
37587 ix86_init_builtins_va_builtins_abi ();
37589 #ifdef SUBTARGET_INIT_BUILTINS
37590 SUBTARGET_INIT_BUILTINS;
37594 /* Return the ix86 builtin for CODE. */
37597 ix86_builtin_decl (unsigned code, bool)
37599 if (code >= IX86_BUILTIN_MAX)
37600 return error_mark_node;
37602 return ix86_builtins[code];
37605 /* Errors in the source file can cause expand_expr to return const0_rtx
37606 where we expect a vector. To avoid crashing, use one of the vector
37607 clear instructions. */
37609 safe_vector_operand (rtx x, machine_mode mode)
37611 if (x == const0_rtx)
37612 x = CONST0_RTX (mode);
37616 /* Fixup modeless constants to fit required mode. */
37618 fixup_modeless_constant (rtx x, machine_mode mode)
37620 if (GET_MODE (x) == VOIDmode)
37621 x = convert_to_mode (mode, x, 1);
37625 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
37628 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
37631 tree arg0 = CALL_EXPR_ARG (exp, 0);
37632 tree arg1 = CALL_EXPR_ARG (exp, 1);
37633 rtx op0 = expand_normal (arg0);
37634 rtx op1 = expand_normal (arg1);
37635 machine_mode tmode = insn_data[icode].operand[0].mode;
37636 machine_mode mode0 = insn_data[icode].operand[1].mode;
37637 machine_mode mode1 = insn_data[icode].operand[2].mode;
37639 if (VECTOR_MODE_P (mode0))
37640 op0 = safe_vector_operand (op0, mode0);
37641 if (VECTOR_MODE_P (mode1))
37642 op1 = safe_vector_operand (op1, mode1);
37644 if (optimize || !target
37645 || GET_MODE (target) != tmode
37646 || !insn_data[icode].operand[0].predicate (target, tmode))
37647 target = gen_reg_rtx (tmode);
37649 if (GET_MODE (op1) == SImode && mode1 == TImode)
37651 rtx x = gen_reg_rtx (V4SImode);
37652 emit_insn (gen_sse2_loadd (x, op1));
37653 op1 = gen_lowpart (TImode, x);
37656 if (!insn_data[icode].operand[1].predicate (op0, mode0))
37657 op0 = copy_to_mode_reg (mode0, op0);
37658 if (!insn_data[icode].operand[2].predicate (op1, mode1))
37659 op1 = copy_to_mode_reg (mode1, op1);
37661 pat = GEN_FCN (icode) (target, op0, op1);
37670 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
37673 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
37674 enum ix86_builtin_func_type m_type,
37675 enum rtx_code sub_code)
37680 bool comparison_p = false;
37682 bool last_arg_constant = false;
37683 int num_memory = 0;
37689 machine_mode tmode = insn_data[icode].operand[0].mode;
37693 case MULTI_ARG_4_DF2_DI_I:
37694 case MULTI_ARG_4_DF2_DI_I1:
37695 case MULTI_ARG_4_SF2_SI_I:
37696 case MULTI_ARG_4_SF2_SI_I1:
37698 last_arg_constant = true;
37701 case MULTI_ARG_3_SF:
37702 case MULTI_ARG_3_DF:
37703 case MULTI_ARG_3_SF2:
37704 case MULTI_ARG_3_DF2:
37705 case MULTI_ARG_3_DI:
37706 case MULTI_ARG_3_SI:
37707 case MULTI_ARG_3_SI_DI:
37708 case MULTI_ARG_3_HI:
37709 case MULTI_ARG_3_HI_SI:
37710 case MULTI_ARG_3_QI:
37711 case MULTI_ARG_3_DI2:
37712 case MULTI_ARG_3_SI2:
37713 case MULTI_ARG_3_HI2:
37714 case MULTI_ARG_3_QI2:
37718 case MULTI_ARG_2_SF:
37719 case MULTI_ARG_2_DF:
37720 case MULTI_ARG_2_DI:
37721 case MULTI_ARG_2_SI:
37722 case MULTI_ARG_2_HI:
37723 case MULTI_ARG_2_QI:
37727 case MULTI_ARG_2_DI_IMM:
37728 case MULTI_ARG_2_SI_IMM:
37729 case MULTI_ARG_2_HI_IMM:
37730 case MULTI_ARG_2_QI_IMM:
37732 last_arg_constant = true;
37735 case MULTI_ARG_1_SF:
37736 case MULTI_ARG_1_DF:
37737 case MULTI_ARG_1_SF2:
37738 case MULTI_ARG_1_DF2:
37739 case MULTI_ARG_1_DI:
37740 case MULTI_ARG_1_SI:
37741 case MULTI_ARG_1_HI:
37742 case MULTI_ARG_1_QI:
37743 case MULTI_ARG_1_SI_DI:
37744 case MULTI_ARG_1_HI_DI:
37745 case MULTI_ARG_1_HI_SI:
37746 case MULTI_ARG_1_QI_DI:
37747 case MULTI_ARG_1_QI_SI:
37748 case MULTI_ARG_1_QI_HI:
37752 case MULTI_ARG_2_DI_CMP:
37753 case MULTI_ARG_2_SI_CMP:
37754 case MULTI_ARG_2_HI_CMP:
37755 case MULTI_ARG_2_QI_CMP:
37757 comparison_p = true;
37760 case MULTI_ARG_2_SF_TF:
37761 case MULTI_ARG_2_DF_TF:
37762 case MULTI_ARG_2_DI_TF:
37763 case MULTI_ARG_2_SI_TF:
37764 case MULTI_ARG_2_HI_TF:
37765 case MULTI_ARG_2_QI_TF:
37771 gcc_unreachable ();
37774 if (optimize || !target
37775 || GET_MODE (target) != tmode
37776 || !insn_data[icode].operand[0].predicate (target, tmode))
37777 target = gen_reg_rtx (tmode);
37779 gcc_assert (nargs <= 4);
37781 for (i = 0; i < nargs; i++)
37783 tree arg = CALL_EXPR_ARG (exp, i);
37784 rtx op = expand_normal (arg);
37785 int adjust = (comparison_p) ? 1 : 0;
37786 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
37788 if (last_arg_constant && i == nargs - 1)
37790 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
37792 enum insn_code new_icode = icode;
37795 case CODE_FOR_xop_vpermil2v2df3:
37796 case CODE_FOR_xop_vpermil2v4sf3:
37797 case CODE_FOR_xop_vpermil2v4df3:
37798 case CODE_FOR_xop_vpermil2v8sf3:
37799 error ("the last argument must be a 2-bit immediate");
37800 return gen_reg_rtx (tmode);
37801 case CODE_FOR_xop_rotlv2di3:
37802 new_icode = CODE_FOR_rotlv2di3;
37804 case CODE_FOR_xop_rotlv4si3:
37805 new_icode = CODE_FOR_rotlv4si3;
37807 case CODE_FOR_xop_rotlv8hi3:
37808 new_icode = CODE_FOR_rotlv8hi3;
37810 case CODE_FOR_xop_rotlv16qi3:
37811 new_icode = CODE_FOR_rotlv16qi3;
37813 if (CONST_INT_P (op))
37815 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
37816 op = GEN_INT (INTVAL (op) & mask);
37817 gcc_checking_assert
37818 (insn_data[icode].operand[i + 1].predicate (op, mode));
37822 gcc_checking_assert
37824 && insn_data[new_icode].operand[0].mode == tmode
37825 && insn_data[new_icode].operand[1].mode == tmode
37826 && insn_data[new_icode].operand[2].mode == mode
37827 && insn_data[new_icode].operand[0].predicate
37828 == insn_data[icode].operand[0].predicate
37829 && insn_data[new_icode].operand[1].predicate
37830 == insn_data[icode].operand[1].predicate);
37836 gcc_unreachable ();
37843 if (VECTOR_MODE_P (mode))
37844 op = safe_vector_operand (op, mode);
37846 /* If we aren't optimizing, only allow one memory operand to be
37848 if (memory_operand (op, mode))
37851 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
37854 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
37856 op = force_reg (mode, op);
37860 args[i].mode = mode;
37866 pat = GEN_FCN (icode) (target, args[0].op);
37871 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37872 GEN_INT ((int)sub_code));
37873 else if (! comparison_p)
37874 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37877 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
37881 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
37886 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37890 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
37894 gcc_unreachable ();
37904 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
37905 insns with vec_merge. */
37908 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
37912 tree arg0 = CALL_EXPR_ARG (exp, 0);
37913 rtx op1, op0 = expand_normal (arg0);
37914 machine_mode tmode = insn_data[icode].operand[0].mode;
37915 machine_mode mode0 = insn_data[icode].operand[1].mode;
37917 if (optimize || !target
37918 || GET_MODE (target) != tmode
37919 || !insn_data[icode].operand[0].predicate (target, tmode))
37920 target = gen_reg_rtx (tmode);
37922 if (VECTOR_MODE_P (mode0))
37923 op0 = safe_vector_operand (op0, mode0);
37925 if ((optimize && !register_operand (op0, mode0))
37926 || !insn_data[icode].operand[1].predicate (op0, mode0))
37927 op0 = copy_to_mode_reg (mode0, op0);
37930 if (!insn_data[icode].operand[2].predicate (op1, mode0))
37931 op1 = copy_to_mode_reg (mode0, op1);
37933 pat = GEN_FCN (icode) (target, op0, op1);
37940 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
37943 ix86_expand_sse_compare (const struct builtin_description *d,
37944 tree exp, rtx target, bool swap)
37947 tree arg0 = CALL_EXPR_ARG (exp, 0);
37948 tree arg1 = CALL_EXPR_ARG (exp, 1);
37949 rtx op0 = expand_normal (arg0);
37950 rtx op1 = expand_normal (arg1);
37952 machine_mode tmode = insn_data[d->icode].operand[0].mode;
37953 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
37954 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
37955 enum rtx_code comparison = d->comparison;
37957 if (VECTOR_MODE_P (mode0))
37958 op0 = safe_vector_operand (op0, mode0);
37959 if (VECTOR_MODE_P (mode1))
37960 op1 = safe_vector_operand (op1, mode1);
37962 /* Swap operands if we have a comparison that isn't available in
37965 std::swap (op0, op1);
37967 if (optimize || !target
37968 || GET_MODE (target) != tmode
37969 || !insn_data[d->icode].operand[0].predicate (target, tmode))
37970 target = gen_reg_rtx (tmode);
37972 if ((optimize && !register_operand (op0, mode0))
37973 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
37974 op0 = copy_to_mode_reg (mode0, op0);
37975 if ((optimize && !register_operand (op1, mode1))
37976 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
37977 op1 = copy_to_mode_reg (mode1, op1);
37979 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
37980 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
37987 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
37990 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
37994 tree arg0 = CALL_EXPR_ARG (exp, 0);
37995 tree arg1 = CALL_EXPR_ARG (exp, 1);
37996 rtx op0 = expand_normal (arg0);
37997 rtx op1 = expand_normal (arg1);
37998 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
37999 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38000 enum rtx_code comparison = d->comparison;
38002 if (VECTOR_MODE_P (mode0))
38003 op0 = safe_vector_operand (op0, mode0);
38004 if (VECTOR_MODE_P (mode1))
38005 op1 = safe_vector_operand (op1, mode1);
38007 /* Swap operands if we have a comparison that isn't available in
38009 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
38010 std::swap (op0, op1);
38012 target = gen_reg_rtx (SImode);
38013 emit_move_insn (target, const0_rtx);
38014 target = gen_rtx_SUBREG (QImode, target, 0);
38016 if ((optimize && !register_operand (op0, mode0))
38017 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38018 op0 = copy_to_mode_reg (mode0, op0);
38019 if ((optimize && !register_operand (op1, mode1))
38020 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38021 op1 = copy_to_mode_reg (mode1, op1);
38023 pat = GEN_FCN (d->icode) (op0, op1);
38027 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38028 gen_rtx_fmt_ee (comparison, QImode,
38032 return SUBREG_REG (target);
38035 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
38038 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
38042 tree arg0 = CALL_EXPR_ARG (exp, 0);
38043 rtx op1, op0 = expand_normal (arg0);
38044 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38045 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38047 if (optimize || target == 0
38048 || GET_MODE (target) != tmode
38049 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38050 target = gen_reg_rtx (tmode);
38052 if (VECTOR_MODE_P (mode0))
38053 op0 = safe_vector_operand (op0, mode0);
38055 if ((optimize && !register_operand (op0, mode0))
38056 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38057 op0 = copy_to_mode_reg (mode0, op0);
38059 op1 = GEN_INT (d->comparison);
38061 pat = GEN_FCN (d->icode) (target, op0, op1);
38069 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
38070 tree exp, rtx target)
38073 tree arg0 = CALL_EXPR_ARG (exp, 0);
38074 tree arg1 = CALL_EXPR_ARG (exp, 1);
38075 rtx op0 = expand_normal (arg0);
38076 rtx op1 = expand_normal (arg1);
38078 machine_mode tmode = insn_data[d->icode].operand[0].mode;
38079 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
38080 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
38082 if (optimize || target == 0
38083 || GET_MODE (target) != tmode
38084 || !insn_data[d->icode].operand[0].predicate (target, tmode))
38085 target = gen_reg_rtx (tmode);
38087 op0 = safe_vector_operand (op0, mode0);
38088 op1 = safe_vector_operand (op1, mode1);
38090 if ((optimize && !register_operand (op0, mode0))
38091 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38092 op0 = copy_to_mode_reg (mode0, op0);
38093 if ((optimize && !register_operand (op1, mode1))
38094 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38095 op1 = copy_to_mode_reg (mode1, op1);
38097 op2 = GEN_INT (d->comparison);
38099 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
38106 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
38109 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
38113 tree arg0 = CALL_EXPR_ARG (exp, 0);
38114 tree arg1 = CALL_EXPR_ARG (exp, 1);
38115 rtx op0 = expand_normal (arg0);
38116 rtx op1 = expand_normal (arg1);
38117 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
38118 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
38119 enum rtx_code comparison = d->comparison;
38121 if (VECTOR_MODE_P (mode0))
38122 op0 = safe_vector_operand (op0, mode0);
38123 if (VECTOR_MODE_P (mode1))
38124 op1 = safe_vector_operand (op1, mode1);
38126 target = gen_reg_rtx (SImode);
38127 emit_move_insn (target, const0_rtx);
38128 target = gen_rtx_SUBREG (QImode, target, 0);
38130 if ((optimize && !register_operand (op0, mode0))
38131 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
38132 op0 = copy_to_mode_reg (mode0, op0);
38133 if ((optimize && !register_operand (op1, mode1))
38134 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
38135 op1 = copy_to_mode_reg (mode1, op1);
38137 pat = GEN_FCN (d->icode) (op0, op1);
38141 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38142 gen_rtx_fmt_ee (comparison, QImode,
38146 return SUBREG_REG (target);
38149 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
38152 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
38153 tree exp, rtx target)
38156 tree arg0 = CALL_EXPR_ARG (exp, 0);
38157 tree arg1 = CALL_EXPR_ARG (exp, 1);
38158 tree arg2 = CALL_EXPR_ARG (exp, 2);
38159 tree arg3 = CALL_EXPR_ARG (exp, 3);
38160 tree arg4 = CALL_EXPR_ARG (exp, 4);
38161 rtx scratch0, scratch1;
38162 rtx op0 = expand_normal (arg0);
38163 rtx op1 = expand_normal (arg1);
38164 rtx op2 = expand_normal (arg2);
38165 rtx op3 = expand_normal (arg3);
38166 rtx op4 = expand_normal (arg4);
38167 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
38169 tmode0 = insn_data[d->icode].operand[0].mode;
38170 tmode1 = insn_data[d->icode].operand[1].mode;
38171 modev2 = insn_data[d->icode].operand[2].mode;
38172 modei3 = insn_data[d->icode].operand[3].mode;
38173 modev4 = insn_data[d->icode].operand[4].mode;
38174 modei5 = insn_data[d->icode].operand[5].mode;
38175 modeimm = insn_data[d->icode].operand[6].mode;
38177 if (VECTOR_MODE_P (modev2))
38178 op0 = safe_vector_operand (op0, modev2);
38179 if (VECTOR_MODE_P (modev4))
38180 op2 = safe_vector_operand (op2, modev4);
38182 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38183 op0 = copy_to_mode_reg (modev2, op0);
38184 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
38185 op1 = copy_to_mode_reg (modei3, op1);
38186 if ((optimize && !register_operand (op2, modev4))
38187 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
38188 op2 = copy_to_mode_reg (modev4, op2);
38189 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
38190 op3 = copy_to_mode_reg (modei5, op3);
38192 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
38194 error ("the fifth argument must be an 8-bit immediate");
38198 if (d->code == IX86_BUILTIN_PCMPESTRI128)
38200 if (optimize || !target
38201 || GET_MODE (target) != tmode0
38202 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38203 target = gen_reg_rtx (tmode0);
38205 scratch1 = gen_reg_rtx (tmode1);
38207 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
38209 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
38211 if (optimize || !target
38212 || GET_MODE (target) != tmode1
38213 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38214 target = gen_reg_rtx (tmode1);
38216 scratch0 = gen_reg_rtx (tmode0);
38218 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
38222 gcc_assert (d->flag);
38224 scratch0 = gen_reg_rtx (tmode0);
38225 scratch1 = gen_reg_rtx (tmode1);
38227 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
38237 target = gen_reg_rtx (SImode);
38238 emit_move_insn (target, const0_rtx);
38239 target = gen_rtx_SUBREG (QImode, target, 0);
38242 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38243 gen_rtx_fmt_ee (EQ, QImode,
38244 gen_rtx_REG ((machine_mode) d->flag,
38247 return SUBREG_REG (target);
38254 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
38257 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
38258 tree exp, rtx target)
38261 tree arg0 = CALL_EXPR_ARG (exp, 0);
38262 tree arg1 = CALL_EXPR_ARG (exp, 1);
38263 tree arg2 = CALL_EXPR_ARG (exp, 2);
38264 rtx scratch0, scratch1;
38265 rtx op0 = expand_normal (arg0);
38266 rtx op1 = expand_normal (arg1);
38267 rtx op2 = expand_normal (arg2);
38268 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
38270 tmode0 = insn_data[d->icode].operand[0].mode;
38271 tmode1 = insn_data[d->icode].operand[1].mode;
38272 modev2 = insn_data[d->icode].operand[2].mode;
38273 modev3 = insn_data[d->icode].operand[3].mode;
38274 modeimm = insn_data[d->icode].operand[4].mode;
38276 if (VECTOR_MODE_P (modev2))
38277 op0 = safe_vector_operand (op0, modev2);
38278 if (VECTOR_MODE_P (modev3))
38279 op1 = safe_vector_operand (op1, modev3);
38281 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
38282 op0 = copy_to_mode_reg (modev2, op0);
38283 if ((optimize && !register_operand (op1, modev3))
38284 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
38285 op1 = copy_to_mode_reg (modev3, op1);
38287 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
38289 error ("the third argument must be an 8-bit immediate");
38293 if (d->code == IX86_BUILTIN_PCMPISTRI128)
38295 if (optimize || !target
38296 || GET_MODE (target) != tmode0
38297 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
38298 target = gen_reg_rtx (tmode0);
38300 scratch1 = gen_reg_rtx (tmode1);
38302 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
38304 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
38306 if (optimize || !target
38307 || GET_MODE (target) != tmode1
38308 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
38309 target = gen_reg_rtx (tmode1);
38311 scratch0 = gen_reg_rtx (tmode0);
38313 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
38317 gcc_assert (d->flag);
38319 scratch0 = gen_reg_rtx (tmode0);
38320 scratch1 = gen_reg_rtx (tmode1);
38322 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
38332 target = gen_reg_rtx (SImode);
38333 emit_move_insn (target, const0_rtx);
38334 target = gen_rtx_SUBREG (QImode, target, 0);
38337 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
38338 gen_rtx_fmt_ee (EQ, QImode,
38339 gen_rtx_REG ((machine_mode) d->flag,
38342 return SUBREG_REG (target);
38348 /* Subroutine of ix86_expand_builtin to take care of insns with
38349 variable number of operands. */
38352 ix86_expand_args_builtin (const struct builtin_description *d,
38353 tree exp, rtx target)
38355 rtx pat, real_target;
38356 unsigned int i, nargs;
38357 unsigned int nargs_constant = 0;
38358 unsigned int mask_pos = 0;
38359 int num_memory = 0;
38365 bool last_arg_count = false;
38366 enum insn_code icode = d->icode;
38367 const struct insn_data_d *insn_p = &insn_data[icode];
38368 machine_mode tmode = insn_p->operand[0].mode;
38369 machine_mode rmode = VOIDmode;
38371 enum rtx_code comparison = d->comparison;
38373 switch ((enum ix86_builtin_func_type) d->flag)
38375 case V2DF_FTYPE_V2DF_ROUND:
38376 case V4DF_FTYPE_V4DF_ROUND:
38377 case V4SF_FTYPE_V4SF_ROUND:
38378 case V8SF_FTYPE_V8SF_ROUND:
38379 case V4SI_FTYPE_V4SF_ROUND:
38380 case V8SI_FTYPE_V8SF_ROUND:
38381 return ix86_expand_sse_round (d, exp, target);
38382 case V4SI_FTYPE_V2DF_V2DF_ROUND:
38383 case V8SI_FTYPE_V4DF_V4DF_ROUND:
38384 case V16SI_FTYPE_V8DF_V8DF_ROUND:
38385 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
38386 case INT_FTYPE_V8SF_V8SF_PTEST:
38387 case INT_FTYPE_V4DI_V4DI_PTEST:
38388 case INT_FTYPE_V4DF_V4DF_PTEST:
38389 case INT_FTYPE_V4SF_V4SF_PTEST:
38390 case INT_FTYPE_V2DI_V2DI_PTEST:
38391 case INT_FTYPE_V2DF_V2DF_PTEST:
38392 return ix86_expand_sse_ptest (d, exp, target);
38393 case FLOAT128_FTYPE_FLOAT128:
38394 case FLOAT_FTYPE_FLOAT:
38395 case INT_FTYPE_INT:
38396 case UINT64_FTYPE_INT:
38397 case UINT16_FTYPE_UINT16:
38398 case INT64_FTYPE_INT64:
38399 case INT64_FTYPE_V4SF:
38400 case INT64_FTYPE_V2DF:
38401 case INT_FTYPE_V16QI:
38402 case INT_FTYPE_V8QI:
38403 case INT_FTYPE_V8SF:
38404 case INT_FTYPE_V4DF:
38405 case INT_FTYPE_V4SF:
38406 case INT_FTYPE_V2DF:
38407 case INT_FTYPE_V32QI:
38408 case V16QI_FTYPE_V16QI:
38409 case V8SI_FTYPE_V8SF:
38410 case V8SI_FTYPE_V4SI:
38411 case V8HI_FTYPE_V8HI:
38412 case V8HI_FTYPE_V16QI:
38413 case V8QI_FTYPE_V8QI:
38414 case V8SF_FTYPE_V8SF:
38415 case V8SF_FTYPE_V8SI:
38416 case V8SF_FTYPE_V4SF:
38417 case V8SF_FTYPE_V8HI:
38418 case V4SI_FTYPE_V4SI:
38419 case V4SI_FTYPE_V16QI:
38420 case V4SI_FTYPE_V4SF:
38421 case V4SI_FTYPE_V8SI:
38422 case V4SI_FTYPE_V8HI:
38423 case V4SI_FTYPE_V4DF:
38424 case V4SI_FTYPE_V2DF:
38425 case V4HI_FTYPE_V4HI:
38426 case V4DF_FTYPE_V4DF:
38427 case V4DF_FTYPE_V4SI:
38428 case V4DF_FTYPE_V4SF:
38429 case V4DF_FTYPE_V2DF:
38430 case V4SF_FTYPE_V4SF:
38431 case V4SF_FTYPE_V4SI:
38432 case V4SF_FTYPE_V8SF:
38433 case V4SF_FTYPE_V4DF:
38434 case V4SF_FTYPE_V8HI:
38435 case V4SF_FTYPE_V2DF:
38436 case V2DI_FTYPE_V2DI:
38437 case V2DI_FTYPE_V16QI:
38438 case V2DI_FTYPE_V8HI:
38439 case V2DI_FTYPE_V4SI:
38440 case V2DF_FTYPE_V2DF:
38441 case V2DF_FTYPE_V4SI:
38442 case V2DF_FTYPE_V4DF:
38443 case V2DF_FTYPE_V4SF:
38444 case V2DF_FTYPE_V2SI:
38445 case V2SI_FTYPE_V2SI:
38446 case V2SI_FTYPE_V4SF:
38447 case V2SI_FTYPE_V2SF:
38448 case V2SI_FTYPE_V2DF:
38449 case V2SF_FTYPE_V2SF:
38450 case V2SF_FTYPE_V2SI:
38451 case V32QI_FTYPE_V32QI:
38452 case V32QI_FTYPE_V16QI:
38453 case V16HI_FTYPE_V16HI:
38454 case V16HI_FTYPE_V8HI:
38455 case V8SI_FTYPE_V8SI:
38456 case V16HI_FTYPE_V16QI:
38457 case V8SI_FTYPE_V16QI:
38458 case V4DI_FTYPE_V16QI:
38459 case V8SI_FTYPE_V8HI:
38460 case V4DI_FTYPE_V8HI:
38461 case V4DI_FTYPE_V4SI:
38462 case V4DI_FTYPE_V2DI:
38463 case UHI_FTYPE_UHI:
38464 case UHI_FTYPE_V16QI:
38465 case USI_FTYPE_V32QI:
38466 case UDI_FTYPE_V64QI:
38467 case V16QI_FTYPE_UHI:
38468 case V32QI_FTYPE_USI:
38469 case V64QI_FTYPE_UDI:
38470 case V8HI_FTYPE_UQI:
38471 case V16HI_FTYPE_UHI:
38472 case V32HI_FTYPE_USI:
38473 case V4SI_FTYPE_UQI:
38474 case V8SI_FTYPE_UQI:
38475 case V4SI_FTYPE_UHI:
38476 case V8SI_FTYPE_UHI:
38477 case UQI_FTYPE_V8HI:
38478 case UHI_FTYPE_V16HI:
38479 case USI_FTYPE_V32HI:
38480 case UQI_FTYPE_V4SI:
38481 case UQI_FTYPE_V8SI:
38482 case UHI_FTYPE_V16SI:
38483 case UQI_FTYPE_V2DI:
38484 case UQI_FTYPE_V4DI:
38485 case UQI_FTYPE_V8DI:
38486 case V16SI_FTYPE_UHI:
38487 case V2DI_FTYPE_UQI:
38488 case V4DI_FTYPE_UQI:
38489 case V16SI_FTYPE_INT:
38490 case V16SF_FTYPE_V8SF:
38491 case V16SI_FTYPE_V8SI:
38492 case V16SF_FTYPE_V4SF:
38493 case V16SI_FTYPE_V4SI:
38494 case V16SF_FTYPE_V16SF:
38495 case V8DI_FTYPE_UQI:
38496 case V8DF_FTYPE_V4DF:
38497 case V8DF_FTYPE_V2DF:
38498 case V8DF_FTYPE_V8DF:
38501 case V4SF_FTYPE_V4SF_VEC_MERGE:
38502 case V2DF_FTYPE_V2DF_VEC_MERGE:
38503 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
38504 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
38505 case V16QI_FTYPE_V16QI_V16QI:
38506 case V16QI_FTYPE_V8HI_V8HI:
38507 case V16SF_FTYPE_V16SF_V16SF:
38508 case V8QI_FTYPE_V8QI_V8QI:
38509 case V8QI_FTYPE_V4HI_V4HI:
38510 case V8HI_FTYPE_V8HI_V8HI:
38511 case V8HI_FTYPE_V16QI_V16QI:
38512 case V8HI_FTYPE_V4SI_V4SI:
38513 case V8SF_FTYPE_V8SF_V8SF:
38514 case V8SF_FTYPE_V8SF_V8SI:
38515 case V8DF_FTYPE_V8DF_V8DF:
38516 case V4SI_FTYPE_V4SI_V4SI:
38517 case V4SI_FTYPE_V8HI_V8HI:
38518 case V4SI_FTYPE_V2DF_V2DF:
38519 case V4HI_FTYPE_V4HI_V4HI:
38520 case V4HI_FTYPE_V8QI_V8QI:
38521 case V4HI_FTYPE_V2SI_V2SI:
38522 case V4DF_FTYPE_V4DF_V4DF:
38523 case V4DF_FTYPE_V4DF_V4DI:
38524 case V4SF_FTYPE_V4SF_V4SF:
38525 case V4SF_FTYPE_V4SF_V4SI:
38526 case V4SF_FTYPE_V4SF_V2SI:
38527 case V4SF_FTYPE_V4SF_V2DF:
38528 case V4SF_FTYPE_V4SF_UINT:
38529 case V4SF_FTYPE_V4SF_DI:
38530 case V4SF_FTYPE_V4SF_SI:
38531 case V2DI_FTYPE_V2DI_V2DI:
38532 case V2DI_FTYPE_V16QI_V16QI:
38533 case V2DI_FTYPE_V4SI_V4SI:
38534 case V2DI_FTYPE_V2DI_V16QI:
38535 case V2SI_FTYPE_V2SI_V2SI:
38536 case V2SI_FTYPE_V4HI_V4HI:
38537 case V2SI_FTYPE_V2SF_V2SF:
38538 case V2DF_FTYPE_V2DF_V2DF:
38539 case V2DF_FTYPE_V2DF_V4SF:
38540 case V2DF_FTYPE_V2DF_V2DI:
38541 case V2DF_FTYPE_V2DF_DI:
38542 case V2DF_FTYPE_V2DF_SI:
38543 case V2DF_FTYPE_V2DF_UINT:
38544 case V2SF_FTYPE_V2SF_V2SF:
38545 case V1DI_FTYPE_V1DI_V1DI:
38546 case V1DI_FTYPE_V8QI_V8QI:
38547 case V1DI_FTYPE_V2SI_V2SI:
38548 case V32QI_FTYPE_V16HI_V16HI:
38549 case V16HI_FTYPE_V8SI_V8SI:
38550 case V32QI_FTYPE_V32QI_V32QI:
38551 case V16HI_FTYPE_V32QI_V32QI:
38552 case V16HI_FTYPE_V16HI_V16HI:
38553 case V8SI_FTYPE_V4DF_V4DF:
38554 case V8SI_FTYPE_V8SI_V8SI:
38555 case V8SI_FTYPE_V16HI_V16HI:
38556 case V4DI_FTYPE_V4DI_V4DI:
38557 case V4DI_FTYPE_V8SI_V8SI:
38558 case V8DI_FTYPE_V64QI_V64QI:
38559 if (comparison == UNKNOWN)
38560 return ix86_expand_binop_builtin (icode, exp, target);
38563 case V4SF_FTYPE_V4SF_V4SF_SWAP:
38564 case V2DF_FTYPE_V2DF_V2DF_SWAP:
38565 gcc_assert (comparison != UNKNOWN);
38569 case V16HI_FTYPE_V16HI_V8HI_COUNT:
38570 case V16HI_FTYPE_V16HI_SI_COUNT:
38571 case V8SI_FTYPE_V8SI_V4SI_COUNT:
38572 case V8SI_FTYPE_V8SI_SI_COUNT:
38573 case V4DI_FTYPE_V4DI_V2DI_COUNT:
38574 case V4DI_FTYPE_V4DI_INT_COUNT:
38575 case V8HI_FTYPE_V8HI_V8HI_COUNT:
38576 case V8HI_FTYPE_V8HI_SI_COUNT:
38577 case V4SI_FTYPE_V4SI_V4SI_COUNT:
38578 case V4SI_FTYPE_V4SI_SI_COUNT:
38579 case V4HI_FTYPE_V4HI_V4HI_COUNT:
38580 case V4HI_FTYPE_V4HI_SI_COUNT:
38581 case V2DI_FTYPE_V2DI_V2DI_COUNT:
38582 case V2DI_FTYPE_V2DI_SI_COUNT:
38583 case V2SI_FTYPE_V2SI_V2SI_COUNT:
38584 case V2SI_FTYPE_V2SI_SI_COUNT:
38585 case V1DI_FTYPE_V1DI_V1DI_COUNT:
38586 case V1DI_FTYPE_V1DI_SI_COUNT:
38588 last_arg_count = true;
38590 case UINT64_FTYPE_UINT64_UINT64:
38591 case UINT_FTYPE_UINT_UINT:
38592 case UINT_FTYPE_UINT_USHORT:
38593 case UINT_FTYPE_UINT_UCHAR:
38594 case UINT16_FTYPE_UINT16_INT:
38595 case UINT8_FTYPE_UINT8_INT:
38596 case UHI_FTYPE_UHI_UHI:
38597 case USI_FTYPE_USI_USI:
38598 case UDI_FTYPE_UDI_UDI:
38599 case V16SI_FTYPE_V8DF_V8DF:
38602 case V2DI_FTYPE_V2DI_INT_CONVERT:
38605 nargs_constant = 1;
38607 case V4DI_FTYPE_V4DI_INT_CONVERT:
38610 nargs_constant = 1;
38612 case V8DI_FTYPE_V8DI_INT_CONVERT:
38615 nargs_constant = 1;
38617 case V8HI_FTYPE_V8HI_INT:
38618 case V8HI_FTYPE_V8SF_INT:
38619 case V16HI_FTYPE_V16SF_INT:
38620 case V8HI_FTYPE_V4SF_INT:
38621 case V8SF_FTYPE_V8SF_INT:
38622 case V4SF_FTYPE_V16SF_INT:
38623 case V16SF_FTYPE_V16SF_INT:
38624 case V4SI_FTYPE_V4SI_INT:
38625 case V4SI_FTYPE_V8SI_INT:
38626 case V4HI_FTYPE_V4HI_INT:
38627 case V4DF_FTYPE_V4DF_INT:
38628 case V4DF_FTYPE_V8DF_INT:
38629 case V4SF_FTYPE_V4SF_INT:
38630 case V4SF_FTYPE_V8SF_INT:
38631 case V2DI_FTYPE_V2DI_INT:
38632 case V2DF_FTYPE_V2DF_INT:
38633 case V2DF_FTYPE_V4DF_INT:
38634 case V16HI_FTYPE_V16HI_INT:
38635 case V8SI_FTYPE_V8SI_INT:
38636 case V16SI_FTYPE_V16SI_INT:
38637 case V4SI_FTYPE_V16SI_INT:
38638 case V4DI_FTYPE_V4DI_INT:
38639 case V2DI_FTYPE_V4DI_INT:
38640 case V4DI_FTYPE_V8DI_INT:
38641 case QI_FTYPE_V4SF_INT:
38642 case QI_FTYPE_V2DF_INT:
38644 nargs_constant = 1;
38646 case V16QI_FTYPE_V16QI_V16QI_V16QI:
38647 case V8SF_FTYPE_V8SF_V8SF_V8SF:
38648 case V4DF_FTYPE_V4DF_V4DF_V4DF:
38649 case V4SF_FTYPE_V4SF_V4SF_V4SF:
38650 case V2DF_FTYPE_V2DF_V2DF_V2DF:
38651 case V32QI_FTYPE_V32QI_V32QI_V32QI:
38652 case UHI_FTYPE_V16SI_V16SI_UHI:
38653 case UQI_FTYPE_V8DI_V8DI_UQI:
38654 case V16HI_FTYPE_V16SI_V16HI_UHI:
38655 case V16QI_FTYPE_V16SI_V16QI_UHI:
38656 case V16QI_FTYPE_V8DI_V16QI_UQI:
38657 case V16SF_FTYPE_V16SF_V16SF_UHI:
38658 case V16SF_FTYPE_V4SF_V16SF_UHI:
38659 case V16SI_FTYPE_SI_V16SI_UHI:
38660 case V16SI_FTYPE_V16HI_V16SI_UHI:
38661 case V16SI_FTYPE_V16QI_V16SI_UHI:
38662 case V8SF_FTYPE_V4SF_V8SF_UQI:
38663 case V4DF_FTYPE_V2DF_V4DF_UQI:
38664 case V8SI_FTYPE_V4SI_V8SI_UQI:
38665 case V8SI_FTYPE_SI_V8SI_UQI:
38666 case V4SI_FTYPE_V4SI_V4SI_UQI:
38667 case V4SI_FTYPE_SI_V4SI_UQI:
38668 case V4DI_FTYPE_V2DI_V4DI_UQI:
38669 case V4DI_FTYPE_DI_V4DI_UQI:
38670 case V2DI_FTYPE_V2DI_V2DI_UQI:
38671 case V2DI_FTYPE_DI_V2DI_UQI:
38672 case V64QI_FTYPE_V64QI_V64QI_UDI:
38673 case V64QI_FTYPE_V16QI_V64QI_UDI:
38674 case V64QI_FTYPE_QI_V64QI_UDI:
38675 case V32QI_FTYPE_V32QI_V32QI_USI:
38676 case V32QI_FTYPE_V16QI_V32QI_USI:
38677 case V32QI_FTYPE_QI_V32QI_USI:
38678 case V16QI_FTYPE_V16QI_V16QI_UHI:
38679 case V16QI_FTYPE_QI_V16QI_UHI:
38680 case V32HI_FTYPE_V8HI_V32HI_USI:
38681 case V32HI_FTYPE_HI_V32HI_USI:
38682 case V16HI_FTYPE_V8HI_V16HI_UHI:
38683 case V16HI_FTYPE_HI_V16HI_UHI:
38684 case V8HI_FTYPE_V8HI_V8HI_UQI:
38685 case V8HI_FTYPE_HI_V8HI_UQI:
38686 case V8SF_FTYPE_V8HI_V8SF_UQI:
38687 case V4SF_FTYPE_V8HI_V4SF_UQI:
38688 case V8SI_FTYPE_V8SF_V8SI_UQI:
38689 case V4SI_FTYPE_V4SF_V4SI_UQI:
38690 case V4DI_FTYPE_V4SF_V4DI_UQI:
38691 case V2DI_FTYPE_V4SF_V2DI_UQI:
38692 case V4SF_FTYPE_V4DI_V4SF_UQI:
38693 case V4SF_FTYPE_V2DI_V4SF_UQI:
38694 case V4DF_FTYPE_V4DI_V4DF_UQI:
38695 case V2DF_FTYPE_V2DI_V2DF_UQI:
38696 case V16QI_FTYPE_V8HI_V16QI_UQI:
38697 case V16QI_FTYPE_V16HI_V16QI_UHI:
38698 case V16QI_FTYPE_V4SI_V16QI_UQI:
38699 case V16QI_FTYPE_V8SI_V16QI_UQI:
38700 case V8HI_FTYPE_V4SI_V8HI_UQI:
38701 case V8HI_FTYPE_V8SI_V8HI_UQI:
38702 case V16QI_FTYPE_V2DI_V16QI_UQI:
38703 case V16QI_FTYPE_V4DI_V16QI_UQI:
38704 case V8HI_FTYPE_V2DI_V8HI_UQI:
38705 case V8HI_FTYPE_V4DI_V8HI_UQI:
38706 case V4SI_FTYPE_V2DI_V4SI_UQI:
38707 case V4SI_FTYPE_V4DI_V4SI_UQI:
38708 case V32QI_FTYPE_V32HI_V32QI_USI:
38709 case UHI_FTYPE_V16QI_V16QI_UHI:
38710 case USI_FTYPE_V32QI_V32QI_USI:
38711 case UDI_FTYPE_V64QI_V64QI_UDI:
38712 case UQI_FTYPE_V8HI_V8HI_UQI:
38713 case UHI_FTYPE_V16HI_V16HI_UHI:
38714 case USI_FTYPE_V32HI_V32HI_USI:
38715 case UQI_FTYPE_V4SI_V4SI_UQI:
38716 case UQI_FTYPE_V8SI_V8SI_UQI:
38717 case UQI_FTYPE_V2DI_V2DI_UQI:
38718 case UQI_FTYPE_V4DI_V4DI_UQI:
38719 case V4SF_FTYPE_V2DF_V4SF_UQI:
38720 case V4SF_FTYPE_V4DF_V4SF_UQI:
38721 case V16SI_FTYPE_V16SI_V16SI_UHI:
38722 case V16SI_FTYPE_V4SI_V16SI_UHI:
38723 case V2DI_FTYPE_V4SI_V2DI_UQI:
38724 case V2DI_FTYPE_V8HI_V2DI_UQI:
38725 case V2DI_FTYPE_V16QI_V2DI_UQI:
38726 case V4DI_FTYPE_V4DI_V4DI_UQI:
38727 case V4DI_FTYPE_V4SI_V4DI_UQI:
38728 case V4DI_FTYPE_V8HI_V4DI_UQI:
38729 case V4DI_FTYPE_V16QI_V4DI_UQI:
38730 case V4DI_FTYPE_V4DF_V4DI_UQI:
38731 case V2DI_FTYPE_V2DF_V2DI_UQI:
38732 case V4SI_FTYPE_V4DF_V4SI_UQI:
38733 case V4SI_FTYPE_V2DF_V4SI_UQI:
38734 case V4SI_FTYPE_V8HI_V4SI_UQI:
38735 case V4SI_FTYPE_V16QI_V4SI_UQI:
38736 case V4DI_FTYPE_V4DI_V4DI_V4DI:
38737 case V8DF_FTYPE_V2DF_V8DF_UQI:
38738 case V8DF_FTYPE_V4DF_V8DF_UQI:
38739 case V8DF_FTYPE_V8DF_V8DF_UQI:
38740 case V8SF_FTYPE_V8SF_V8SF_UQI:
38741 case V8SF_FTYPE_V8SI_V8SF_UQI:
38742 case V4DF_FTYPE_V4DF_V4DF_UQI:
38743 case V4SF_FTYPE_V4SF_V4SF_UQI:
38744 case V2DF_FTYPE_V2DF_V2DF_UQI:
38745 case V2DF_FTYPE_V4SF_V2DF_UQI:
38746 case V2DF_FTYPE_V4SI_V2DF_UQI:
38747 case V4SF_FTYPE_V4SI_V4SF_UQI:
38748 case V4DF_FTYPE_V4SF_V4DF_UQI:
38749 case V4DF_FTYPE_V4SI_V4DF_UQI:
38750 case V8SI_FTYPE_V8SI_V8SI_UQI:
38751 case V8SI_FTYPE_V8HI_V8SI_UQI:
38752 case V8SI_FTYPE_V16QI_V8SI_UQI:
38753 case V8DF_FTYPE_V8SI_V8DF_UQI:
38754 case V8DI_FTYPE_DI_V8DI_UQI:
38755 case V16SF_FTYPE_V8SF_V16SF_UHI:
38756 case V16SI_FTYPE_V8SI_V16SI_UHI:
38757 case V16HI_FTYPE_V16HI_V16HI_UHI:
38758 case V8HI_FTYPE_V16QI_V8HI_UQI:
38759 case V16HI_FTYPE_V16QI_V16HI_UHI:
38760 case V32HI_FTYPE_V32HI_V32HI_USI:
38761 case V32HI_FTYPE_V32QI_V32HI_USI:
38762 case V8DI_FTYPE_V16QI_V8DI_UQI:
38763 case V8DI_FTYPE_V2DI_V8DI_UQI:
38764 case V8DI_FTYPE_V4DI_V8DI_UQI:
38765 case V8DI_FTYPE_V8DI_V8DI_UQI:
38766 case V8DI_FTYPE_V8HI_V8DI_UQI:
38767 case V8DI_FTYPE_V8SI_V8DI_UQI:
38768 case V8HI_FTYPE_V8DI_V8HI_UQI:
38769 case V8SI_FTYPE_V8DI_V8SI_UQI:
38770 case V4SI_FTYPE_V4SI_V4SI_V4SI:
38773 case V32QI_FTYPE_V32QI_V32QI_INT:
38774 case V16HI_FTYPE_V16HI_V16HI_INT:
38775 case V16QI_FTYPE_V16QI_V16QI_INT:
38776 case V4DI_FTYPE_V4DI_V4DI_INT:
38777 case V8HI_FTYPE_V8HI_V8HI_INT:
38778 case V8SI_FTYPE_V8SI_V8SI_INT:
38779 case V8SI_FTYPE_V8SI_V4SI_INT:
38780 case V8SF_FTYPE_V8SF_V8SF_INT:
38781 case V8SF_FTYPE_V8SF_V4SF_INT:
38782 case V4SI_FTYPE_V4SI_V4SI_INT:
38783 case V4DF_FTYPE_V4DF_V4DF_INT:
38784 case V16SF_FTYPE_V16SF_V16SF_INT:
38785 case V16SF_FTYPE_V16SF_V4SF_INT:
38786 case V16SI_FTYPE_V16SI_V4SI_INT:
38787 case V4DF_FTYPE_V4DF_V2DF_INT:
38788 case V4SF_FTYPE_V4SF_V4SF_INT:
38789 case V2DI_FTYPE_V2DI_V2DI_INT:
38790 case V4DI_FTYPE_V4DI_V2DI_INT:
38791 case V2DF_FTYPE_V2DF_V2DF_INT:
38792 case UQI_FTYPE_V8DI_V8UDI_INT:
38793 case UQI_FTYPE_V8DF_V8DF_INT:
38794 case UQI_FTYPE_V2DF_V2DF_INT:
38795 case UQI_FTYPE_V4SF_V4SF_INT:
38796 case UHI_FTYPE_V16SI_V16SI_INT:
38797 case UHI_FTYPE_V16SF_V16SF_INT:
38799 nargs_constant = 1;
38801 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
38804 nargs_constant = 1;
38806 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
38809 nargs_constant = 1;
38811 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
38814 nargs_constant = 1;
38816 case V2DI_FTYPE_V2DI_UINT_UINT:
38818 nargs_constant = 2;
38820 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
38823 nargs_constant = 1;
38825 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
38829 nargs_constant = 1;
38831 case QI_FTYPE_V8DF_INT_UQI:
38832 case QI_FTYPE_V4DF_INT_UQI:
38833 case QI_FTYPE_V2DF_INT_UQI:
38834 case HI_FTYPE_V16SF_INT_UHI:
38835 case QI_FTYPE_V8SF_INT_UQI:
38836 case QI_FTYPE_V4SF_INT_UQI:
38839 nargs_constant = 1;
38841 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
38845 nargs_constant = 1;
38847 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
38851 nargs_constant = 1;
38853 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
38854 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
38855 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
38856 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
38857 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
38858 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
38859 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
38860 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
38861 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
38862 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
38863 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
38864 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
38865 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
38866 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
38867 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
38868 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
38869 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
38870 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
38871 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
38872 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
38873 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
38874 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
38875 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
38876 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
38877 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
38878 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
38879 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
38880 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
38881 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
38882 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
38883 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
38884 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
38885 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
38886 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
38887 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
38888 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
38889 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
38890 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
38891 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
38892 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
38893 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
38894 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
38895 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
38896 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
38897 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
38898 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
38899 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
38900 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
38901 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
38902 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
38903 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
38906 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
38907 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
38908 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
38909 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
38910 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
38912 nargs_constant = 1;
38914 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
38915 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
38916 case QI_FTYPE_V4DF_V4DF_INT_UQI:
38917 case QI_FTYPE_V8SF_V8SF_INT_UQI:
38918 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
38919 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
38920 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
38921 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
38922 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
38923 case USI_FTYPE_V32QI_V32QI_INT_USI:
38924 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
38925 case USI_FTYPE_V32HI_V32HI_INT_USI:
38926 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
38927 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
38930 nargs_constant = 1;
38932 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
38934 nargs_constant = 2;
38936 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
38937 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
38940 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
38941 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
38944 nargs_constant = 1;
38946 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
38947 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
38948 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
38949 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
38950 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
38951 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
38952 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
38953 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
38954 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
38955 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
38956 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
38957 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
38958 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
38959 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
38960 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
38961 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
38962 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
38963 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
38964 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
38965 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
38966 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
38967 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
38968 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
38969 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
38970 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
38971 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
38972 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
38973 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
38974 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
38975 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
38978 nargs_constant = 1;
38980 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
38981 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
38982 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
38983 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
38984 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
38985 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
38986 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
38987 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
38988 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
38989 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
38990 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
38991 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
38992 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
38993 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
38994 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
38995 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
38996 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
38997 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
38998 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
38999 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
39000 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
39001 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
39002 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
39003 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
39004 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
39005 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
39006 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
39009 nargs_constant = 1;
39011 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
39012 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
39013 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
39014 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
39015 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
39016 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
39017 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
39018 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
39019 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
39020 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
39024 nargs_constant = 1;
39028 gcc_unreachable ();
39031 gcc_assert (nargs <= ARRAY_SIZE (args));
39033 if (comparison != UNKNOWN)
39035 gcc_assert (nargs == 2);
39036 return ix86_expand_sse_compare (d, exp, target, swap);
39039 if (rmode == VOIDmode || rmode == tmode)
39043 || GET_MODE (target) != tmode
39044 || !insn_p->operand[0].predicate (target, tmode))
39045 target = gen_reg_rtx (tmode);
39046 real_target = target;
39050 real_target = gen_reg_rtx (tmode);
39051 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
39054 for (i = 0; i < nargs; i++)
39056 tree arg = CALL_EXPR_ARG (exp, i);
39057 rtx op = expand_normal (arg);
39058 machine_mode mode = insn_p->operand[i + 1].mode;
39059 bool match = insn_p->operand[i + 1].predicate (op, mode);
39061 if (last_arg_count && (i + 1) == nargs)
39063 /* SIMD shift insns take either an 8-bit immediate or
39064 register as count. But builtin functions take int as
39065 count. If count doesn't match, we put it in register. */
39068 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
39069 if (!insn_p->operand[i + 1].predicate (op, mode))
39070 op = copy_to_reg (op);
39073 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39074 (!mask_pos && (nargs - i) <= nargs_constant))
39079 case CODE_FOR_avx_vinsertf128v4di:
39080 case CODE_FOR_avx_vextractf128v4di:
39081 error ("the last argument must be an 1-bit immediate");
39084 case CODE_FOR_avx512f_cmpv8di3_mask:
39085 case CODE_FOR_avx512f_cmpv16si3_mask:
39086 case CODE_FOR_avx512f_ucmpv8di3_mask:
39087 case CODE_FOR_avx512f_ucmpv16si3_mask:
39088 case CODE_FOR_avx512vl_cmpv4di3_mask:
39089 case CODE_FOR_avx512vl_cmpv8si3_mask:
39090 case CODE_FOR_avx512vl_ucmpv4di3_mask:
39091 case CODE_FOR_avx512vl_ucmpv8si3_mask:
39092 case CODE_FOR_avx512vl_cmpv2di3_mask:
39093 case CODE_FOR_avx512vl_cmpv4si3_mask:
39094 case CODE_FOR_avx512vl_ucmpv2di3_mask:
39095 case CODE_FOR_avx512vl_ucmpv4si3_mask:
39096 error ("the last argument must be a 3-bit immediate");
39099 case CODE_FOR_sse4_1_roundsd:
39100 case CODE_FOR_sse4_1_roundss:
39102 case CODE_FOR_sse4_1_roundpd:
39103 case CODE_FOR_sse4_1_roundps:
39104 case CODE_FOR_avx_roundpd256:
39105 case CODE_FOR_avx_roundps256:
39107 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
39108 case CODE_FOR_sse4_1_roundps_sfix:
39109 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
39110 case CODE_FOR_avx_roundps_sfix256:
39112 case CODE_FOR_sse4_1_blendps:
39113 case CODE_FOR_avx_blendpd256:
39114 case CODE_FOR_avx_vpermilv4df:
39115 case CODE_FOR_avx_vpermilv4df_mask:
39116 case CODE_FOR_avx512f_getmantv8df_mask:
39117 case CODE_FOR_avx512f_getmantv16sf_mask:
39118 case CODE_FOR_avx512vl_getmantv8sf_mask:
39119 case CODE_FOR_avx512vl_getmantv4df_mask:
39120 case CODE_FOR_avx512vl_getmantv4sf_mask:
39121 case CODE_FOR_avx512vl_getmantv2df_mask:
39122 case CODE_FOR_avx512dq_rangepv8df_mask_round:
39123 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
39124 case CODE_FOR_avx512dq_rangepv4df_mask:
39125 case CODE_FOR_avx512dq_rangepv8sf_mask:
39126 case CODE_FOR_avx512dq_rangepv2df_mask:
39127 case CODE_FOR_avx512dq_rangepv4sf_mask:
39128 case CODE_FOR_avx_shufpd256_mask:
39129 error ("the last argument must be a 4-bit immediate");
39132 case CODE_FOR_sha1rnds4:
39133 case CODE_FOR_sse4_1_blendpd:
39134 case CODE_FOR_avx_vpermilv2df:
39135 case CODE_FOR_avx_vpermilv2df_mask:
39136 case CODE_FOR_xop_vpermil2v2df3:
39137 case CODE_FOR_xop_vpermil2v4sf3:
39138 case CODE_FOR_xop_vpermil2v4df3:
39139 case CODE_FOR_xop_vpermil2v8sf3:
39140 case CODE_FOR_avx512f_vinsertf32x4_mask:
39141 case CODE_FOR_avx512f_vinserti32x4_mask:
39142 case CODE_FOR_avx512f_vextractf32x4_mask:
39143 case CODE_FOR_avx512f_vextracti32x4_mask:
39144 case CODE_FOR_sse2_shufpd:
39145 case CODE_FOR_sse2_shufpd_mask:
39146 case CODE_FOR_avx512dq_shuf_f64x2_mask:
39147 case CODE_FOR_avx512dq_shuf_i64x2_mask:
39148 case CODE_FOR_avx512vl_shuf_i32x4_mask:
39149 case CODE_FOR_avx512vl_shuf_f32x4_mask:
39150 error ("the last argument must be a 2-bit immediate");
39153 case CODE_FOR_avx_vextractf128v4df:
39154 case CODE_FOR_avx_vextractf128v8sf:
39155 case CODE_FOR_avx_vextractf128v8si:
39156 case CODE_FOR_avx_vinsertf128v4df:
39157 case CODE_FOR_avx_vinsertf128v8sf:
39158 case CODE_FOR_avx_vinsertf128v8si:
39159 case CODE_FOR_avx512f_vinsertf64x4_mask:
39160 case CODE_FOR_avx512f_vinserti64x4_mask:
39161 case CODE_FOR_avx512f_vextractf64x4_mask:
39162 case CODE_FOR_avx512f_vextracti64x4_mask:
39163 case CODE_FOR_avx512dq_vinsertf32x8_mask:
39164 case CODE_FOR_avx512dq_vinserti32x8_mask:
39165 case CODE_FOR_avx512vl_vinsertv4df:
39166 case CODE_FOR_avx512vl_vinsertv4di:
39167 case CODE_FOR_avx512vl_vinsertv8sf:
39168 case CODE_FOR_avx512vl_vinsertv8si:
39169 error ("the last argument must be a 1-bit immediate");
39172 case CODE_FOR_avx_vmcmpv2df3:
39173 case CODE_FOR_avx_vmcmpv4sf3:
39174 case CODE_FOR_avx_cmpv2df3:
39175 case CODE_FOR_avx_cmpv4sf3:
39176 case CODE_FOR_avx_cmpv4df3:
39177 case CODE_FOR_avx_cmpv8sf3:
39178 case CODE_FOR_avx512f_cmpv8df3_mask:
39179 case CODE_FOR_avx512f_cmpv16sf3_mask:
39180 case CODE_FOR_avx512f_vmcmpv2df3_mask:
39181 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
39182 error ("the last argument must be a 5-bit immediate");
39186 switch (nargs_constant)
39189 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
39190 (!mask_pos && (nargs - i) == nargs_constant))
39192 error ("the next to last argument must be an 8-bit immediate");
39196 error ("the last argument must be an 8-bit immediate");
39199 gcc_unreachable ();
39206 if (VECTOR_MODE_P (mode))
39207 op = safe_vector_operand (op, mode);
39209 /* If we aren't optimizing, only allow one memory operand to
39211 if (memory_operand (op, mode))
39214 op = fixup_modeless_constant (op, mode);
39216 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39218 if (optimize || !match || num_memory > 1)
39219 op = copy_to_mode_reg (mode, op);
39223 op = copy_to_reg (op);
39224 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39229 args[i].mode = mode;
39235 pat = GEN_FCN (icode) (real_target, args[0].op);
39238 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
39241 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39245 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39246 args[2].op, args[3].op);
39249 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39250 args[2].op, args[3].op, args[4].op);
39252 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
39253 args[2].op, args[3].op, args[4].op,
39257 gcc_unreachable ();
39267 /* Transform pattern of following layout:
39270 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
39278 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
39282 (parallel [ A B ... ]) */
39285 ix86_erase_embedded_rounding (rtx pat)
39287 if (GET_CODE (pat) == INSN)
39288 pat = PATTERN (pat);
39290 gcc_assert (GET_CODE (pat) == PARALLEL);
39292 if (XVECLEN (pat, 0) == 2)
39294 rtx p0 = XVECEXP (pat, 0, 0);
39295 rtx p1 = XVECEXP (pat, 0, 1);
39297 gcc_assert (GET_CODE (p0) == SET
39298 && GET_CODE (p1) == UNSPEC
39299 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
39305 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
39309 for (; i < XVECLEN (pat, 0); ++i)
39311 rtx elem = XVECEXP (pat, 0, i);
39312 if (GET_CODE (elem) != UNSPEC
39313 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
39317 /* No more than 1 occurence was removed. */
39318 gcc_assert (j >= XVECLEN (pat, 0) - 1);
39320 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
39324 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
39327 ix86_expand_sse_comi_round (const struct builtin_description *d,
39328 tree exp, rtx target)
39331 tree arg0 = CALL_EXPR_ARG (exp, 0);
39332 tree arg1 = CALL_EXPR_ARG (exp, 1);
39333 tree arg2 = CALL_EXPR_ARG (exp, 2);
39334 tree arg3 = CALL_EXPR_ARG (exp, 3);
39335 rtx op0 = expand_normal (arg0);
39336 rtx op1 = expand_normal (arg1);
39337 rtx op2 = expand_normal (arg2);
39338 rtx op3 = expand_normal (arg3);
39339 enum insn_code icode = d->icode;
39340 const struct insn_data_d *insn_p = &insn_data[icode];
39341 machine_mode mode0 = insn_p->operand[0].mode;
39342 machine_mode mode1 = insn_p->operand[1].mode;
39343 enum rtx_code comparison = UNEQ;
39344 bool need_ucomi = false;
39346 /* See avxintrin.h for values. */
39347 enum rtx_code comi_comparisons[32] =
39349 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
39350 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
39351 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
39353 bool need_ucomi_values[32] =
39355 true, false, false, true, true, false, false, true,
39356 true, false, false, true, true, false, false, true,
39357 false, true, true, false, false, true, true, false,
39358 false, true, true, false, false, true, true, false
39361 if (!CONST_INT_P (op2))
39363 error ("the third argument must be comparison constant");
39366 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
39368 error ("incorrect comparison mode");
39372 if (!insn_p->operand[2].predicate (op3, SImode))
39374 error ("incorrect rounding operand");
39378 comparison = comi_comparisons[INTVAL (op2)];
39379 need_ucomi = need_ucomi_values[INTVAL (op2)];
39381 if (VECTOR_MODE_P (mode0))
39382 op0 = safe_vector_operand (op0, mode0);
39383 if (VECTOR_MODE_P (mode1))
39384 op1 = safe_vector_operand (op1, mode1);
39386 target = gen_reg_rtx (SImode);
39387 emit_move_insn (target, const0_rtx);
39388 target = gen_rtx_SUBREG (QImode, target, 0);
39390 if ((optimize && !register_operand (op0, mode0))
39391 || !insn_p->operand[0].predicate (op0, mode0))
39392 op0 = copy_to_mode_reg (mode0, op0);
39393 if ((optimize && !register_operand (op1, mode1))
39394 || !insn_p->operand[1].predicate (op1, mode1))
39395 op1 = copy_to_mode_reg (mode1, op1);
39398 icode = icode == CODE_FOR_sse_comi_round
39399 ? CODE_FOR_sse_ucomi_round
39400 : CODE_FOR_sse2_ucomi_round;
39402 pat = GEN_FCN (icode) (op0, op1, op3);
39406 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
39407 if (INTVAL (op3) == NO_ROUND)
39409 pat = ix86_erase_embedded_rounding (pat);
39413 set_dst = SET_DEST (pat);
39417 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
39418 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
39422 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
39423 gen_rtx_fmt_ee (comparison, QImode,
39427 return SUBREG_REG (target);
39431 ix86_expand_round_builtin (const struct builtin_description *d,
39432 tree exp, rtx target)
39435 unsigned int i, nargs;
39441 enum insn_code icode = d->icode;
39442 const struct insn_data_d *insn_p = &insn_data[icode];
39443 machine_mode tmode = insn_p->operand[0].mode;
39444 unsigned int nargs_constant = 0;
39445 unsigned int redundant_embed_rnd = 0;
39447 switch ((enum ix86_builtin_func_type) d->flag)
39449 case UINT64_FTYPE_V2DF_INT:
39450 case UINT64_FTYPE_V4SF_INT:
39451 case UINT_FTYPE_V2DF_INT:
39452 case UINT_FTYPE_V4SF_INT:
39453 case INT64_FTYPE_V2DF_INT:
39454 case INT64_FTYPE_V4SF_INT:
39455 case INT_FTYPE_V2DF_INT:
39456 case INT_FTYPE_V4SF_INT:
39459 case V4SF_FTYPE_V4SF_UINT_INT:
39460 case V4SF_FTYPE_V4SF_UINT64_INT:
39461 case V2DF_FTYPE_V2DF_UINT64_INT:
39462 case V4SF_FTYPE_V4SF_INT_INT:
39463 case V4SF_FTYPE_V4SF_INT64_INT:
39464 case V2DF_FTYPE_V2DF_INT64_INT:
39465 case V4SF_FTYPE_V4SF_V4SF_INT:
39466 case V2DF_FTYPE_V2DF_V2DF_INT:
39467 case V4SF_FTYPE_V4SF_V2DF_INT:
39468 case V2DF_FTYPE_V2DF_V4SF_INT:
39471 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
39472 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
39473 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
39474 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
39475 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
39476 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
39477 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
39478 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
39479 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
39480 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
39481 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
39482 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
39483 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
39484 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
39487 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
39488 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
39489 nargs_constant = 2;
39492 case INT_FTYPE_V4SF_V4SF_INT_INT:
39493 case INT_FTYPE_V2DF_V2DF_INT_INT:
39494 return ix86_expand_sse_comi_round (d, exp, target);
39495 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
39496 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
39497 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
39498 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
39499 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
39500 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
39503 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
39504 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
39505 nargs_constant = 4;
39508 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
39509 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
39510 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
39511 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
39512 nargs_constant = 3;
39515 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
39516 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
39517 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
39518 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
39520 nargs_constant = 4;
39522 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
39523 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
39524 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
39525 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
39527 nargs_constant = 3;
39530 gcc_unreachable ();
39532 gcc_assert (nargs <= ARRAY_SIZE (args));
39536 || GET_MODE (target) != tmode
39537 || !insn_p->operand[0].predicate (target, tmode))
39538 target = gen_reg_rtx (tmode);
39540 for (i = 0; i < nargs; i++)
39542 tree arg = CALL_EXPR_ARG (exp, i);
39543 rtx op = expand_normal (arg);
39544 machine_mode mode = insn_p->operand[i + 1].mode;
39545 bool match = insn_p->operand[i + 1].predicate (op, mode);
39547 if (i == nargs - nargs_constant)
39553 case CODE_FOR_avx512f_getmantv8df_mask_round:
39554 case CODE_FOR_avx512f_getmantv16sf_mask_round:
39555 case CODE_FOR_avx512f_vgetmantv2df_round:
39556 case CODE_FOR_avx512f_vgetmantv4sf_round:
39557 error ("the immediate argument must be a 4-bit immediate");
39559 case CODE_FOR_avx512f_cmpv8df3_mask_round:
39560 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
39561 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
39562 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
39563 error ("the immediate argument must be a 5-bit immediate");
39566 error ("the immediate argument must be an 8-bit immediate");
39571 else if (i == nargs-1)
39573 if (!insn_p->operand[nargs].predicate (op, SImode))
39575 error ("incorrect rounding operand");
39579 /* If there is no rounding use normal version of the pattern. */
39580 if (INTVAL (op) == NO_ROUND)
39581 redundant_embed_rnd = 1;
39585 if (VECTOR_MODE_P (mode))
39586 op = safe_vector_operand (op, mode);
39588 op = fixup_modeless_constant (op, mode);
39590 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39592 if (optimize || !match)
39593 op = copy_to_mode_reg (mode, op);
39597 op = copy_to_reg (op);
39598 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
39603 args[i].mode = mode;
39609 pat = GEN_FCN (icode) (target, args[0].op);
39612 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
39615 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39619 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39620 args[2].op, args[3].op);
39623 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39624 args[2].op, args[3].op, args[4].op);
39626 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
39627 args[2].op, args[3].op, args[4].op,
39631 gcc_unreachable ();
39637 if (redundant_embed_rnd)
39638 pat = ix86_erase_embedded_rounding (pat);
39644 /* Subroutine of ix86_expand_builtin to take care of special insns
39645 with variable number of operands. */
39648 ix86_expand_special_args_builtin (const struct builtin_description *d,
39649 tree exp, rtx target)
39653 unsigned int i, nargs, arg_adjust, memory;
39654 bool aligned_mem = false;
39660 enum insn_code icode = d->icode;
39661 bool last_arg_constant = false;
39662 const struct insn_data_d *insn_p = &insn_data[icode];
39663 machine_mode tmode = insn_p->operand[0].mode;
39664 enum { load, store } klass;
39666 switch ((enum ix86_builtin_func_type) d->flag)
39668 case VOID_FTYPE_VOID:
39669 emit_insn (GEN_FCN (icode) (target));
39671 case VOID_FTYPE_UINT64:
39672 case VOID_FTYPE_UNSIGNED:
39678 case INT_FTYPE_VOID:
39679 case USHORT_FTYPE_VOID:
39680 case UINT64_FTYPE_VOID:
39681 case UNSIGNED_FTYPE_VOID:
39686 case UINT64_FTYPE_PUNSIGNED:
39687 case V2DI_FTYPE_PV2DI:
39688 case V4DI_FTYPE_PV4DI:
39689 case V32QI_FTYPE_PCCHAR:
39690 case V16QI_FTYPE_PCCHAR:
39691 case V8SF_FTYPE_PCV4SF:
39692 case V8SF_FTYPE_PCFLOAT:
39693 case V4SF_FTYPE_PCFLOAT:
39694 case V4DF_FTYPE_PCV2DF:
39695 case V4DF_FTYPE_PCDOUBLE:
39696 case V2DF_FTYPE_PCDOUBLE:
39697 case VOID_FTYPE_PVOID:
39698 case V8DI_FTYPE_PV8DI:
39704 case CODE_FOR_sse4_1_movntdqa:
39705 case CODE_FOR_avx2_movntdqa:
39706 case CODE_FOR_avx512f_movntdqa:
39707 aligned_mem = true;
39713 case VOID_FTYPE_PV2SF_V4SF:
39714 case VOID_FTYPE_PV8DI_V8DI:
39715 case VOID_FTYPE_PV4DI_V4DI:
39716 case VOID_FTYPE_PV2DI_V2DI:
39717 case VOID_FTYPE_PCHAR_V32QI:
39718 case VOID_FTYPE_PCHAR_V16QI:
39719 case VOID_FTYPE_PFLOAT_V16SF:
39720 case VOID_FTYPE_PFLOAT_V8SF:
39721 case VOID_FTYPE_PFLOAT_V4SF:
39722 case VOID_FTYPE_PDOUBLE_V8DF:
39723 case VOID_FTYPE_PDOUBLE_V4DF:
39724 case VOID_FTYPE_PDOUBLE_V2DF:
39725 case VOID_FTYPE_PLONGLONG_LONGLONG:
39726 case VOID_FTYPE_PULONGLONG_ULONGLONG:
39727 case VOID_FTYPE_PINT_INT:
39730 /* Reserve memory operand for target. */
39731 memory = ARRAY_SIZE (args);
39734 /* These builtins and instructions require the memory
39735 to be properly aligned. */
39736 case CODE_FOR_avx_movntv4di:
39737 case CODE_FOR_sse2_movntv2di:
39738 case CODE_FOR_avx_movntv8sf:
39739 case CODE_FOR_sse_movntv4sf:
39740 case CODE_FOR_sse4a_vmmovntv4sf:
39741 case CODE_FOR_avx_movntv4df:
39742 case CODE_FOR_sse2_movntv2df:
39743 case CODE_FOR_sse4a_vmmovntv2df:
39744 case CODE_FOR_sse2_movntidi:
39745 case CODE_FOR_sse_movntq:
39746 case CODE_FOR_sse2_movntisi:
39747 case CODE_FOR_avx512f_movntv16sf:
39748 case CODE_FOR_avx512f_movntv8df:
39749 case CODE_FOR_avx512f_movntv8di:
39750 aligned_mem = true;
39756 case V4SF_FTYPE_V4SF_PCV2SF:
39757 case V2DF_FTYPE_V2DF_PCDOUBLE:
39762 case V8SF_FTYPE_PCV8SF_V8SI:
39763 case V4DF_FTYPE_PCV4DF_V4DI:
39764 case V4SF_FTYPE_PCV4SF_V4SI:
39765 case V2DF_FTYPE_PCV2DF_V2DI:
39766 case V8SI_FTYPE_PCV8SI_V8SI:
39767 case V4DI_FTYPE_PCV4DI_V4DI:
39768 case V4SI_FTYPE_PCV4SI_V4SI:
39769 case V2DI_FTYPE_PCV2DI_V2DI:
39774 case VOID_FTYPE_PV8DF_V8DF_UQI:
39775 case VOID_FTYPE_PV16SF_V16SF_UHI:
39776 case VOID_FTYPE_PV8DI_V8DI_UQI:
39777 case VOID_FTYPE_PV4DI_V4DI_UQI:
39778 case VOID_FTYPE_PV2DI_V2DI_UQI:
39779 case VOID_FTYPE_PV16SI_V16SI_UHI:
39780 case VOID_FTYPE_PV8SI_V8SI_UQI:
39781 case VOID_FTYPE_PV4SI_V4SI_UQI:
39784 /* These builtins and instructions require the memory
39785 to be properly aligned. */
39786 case CODE_FOR_avx512f_storev16sf_mask:
39787 case CODE_FOR_avx512f_storev16si_mask:
39788 case CODE_FOR_avx512f_storev8df_mask:
39789 case CODE_FOR_avx512f_storev8di_mask:
39790 case CODE_FOR_avx512vl_storev8sf_mask:
39791 case CODE_FOR_avx512vl_storev8si_mask:
39792 case CODE_FOR_avx512vl_storev4df_mask:
39793 case CODE_FOR_avx512vl_storev4di_mask:
39794 case CODE_FOR_avx512vl_storev4sf_mask:
39795 case CODE_FOR_avx512vl_storev4si_mask:
39796 case CODE_FOR_avx512vl_storev2df_mask:
39797 case CODE_FOR_avx512vl_storev2di_mask:
39798 aligned_mem = true;
39804 case VOID_FTYPE_PV8SF_V8SI_V8SF:
39805 case VOID_FTYPE_PV4DF_V4DI_V4DF:
39806 case VOID_FTYPE_PV4SF_V4SI_V4SF:
39807 case VOID_FTYPE_PV2DF_V2DI_V2DF:
39808 case VOID_FTYPE_PV8SI_V8SI_V8SI:
39809 case VOID_FTYPE_PV4DI_V4DI_V4DI:
39810 case VOID_FTYPE_PV4SI_V4SI_V4SI:
39811 case VOID_FTYPE_PV2DI_V2DI_V2DI:
39812 case VOID_FTYPE_PV8SI_V8DI_UQI:
39813 case VOID_FTYPE_PV8HI_V8DI_UQI:
39814 case VOID_FTYPE_PV16HI_V16SI_UHI:
39815 case VOID_FTYPE_PV16QI_V8DI_UQI:
39816 case VOID_FTYPE_PV16QI_V16SI_UHI:
39817 case VOID_FTYPE_PV4SI_V4DI_UQI:
39818 case VOID_FTYPE_PV4SI_V2DI_UQI:
39819 case VOID_FTYPE_PV8HI_V4DI_UQI:
39820 case VOID_FTYPE_PV8HI_V2DI_UQI:
39821 case VOID_FTYPE_PV8HI_V8SI_UQI:
39822 case VOID_FTYPE_PV8HI_V4SI_UQI:
39823 case VOID_FTYPE_PV16QI_V4DI_UQI:
39824 case VOID_FTYPE_PV16QI_V2DI_UQI:
39825 case VOID_FTYPE_PV16QI_V8SI_UQI:
39826 case VOID_FTYPE_PV16QI_V4SI_UQI:
39827 case VOID_FTYPE_PV8HI_V8HI_UQI:
39828 case VOID_FTYPE_PV16HI_V16HI_UHI:
39829 case VOID_FTYPE_PV32HI_V32HI_USI:
39830 case VOID_FTYPE_PV16QI_V16QI_UHI:
39831 case VOID_FTYPE_PV32QI_V32QI_USI:
39832 case VOID_FTYPE_PV64QI_V64QI_UDI:
39833 case VOID_FTYPE_PV4DF_V4DF_UQI:
39834 case VOID_FTYPE_PV2DF_V2DF_UQI:
39835 case VOID_FTYPE_PV8SF_V8SF_UQI:
39836 case VOID_FTYPE_PV4SF_V4SF_UQI:
39839 /* Reserve memory operand for target. */
39840 memory = ARRAY_SIZE (args);
39842 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
39843 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
39844 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
39845 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
39846 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
39847 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
39848 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
39849 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
39850 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
39851 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
39852 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
39853 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
39854 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
39855 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
39856 case V32HI_FTYPE_PCV32HI_V32HI_USI:
39857 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
39858 case V32QI_FTYPE_PCV32QI_V32QI_USI:
39859 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
39865 /* These builtins and instructions require the memory
39866 to be properly aligned. */
39867 case CODE_FOR_avx512f_loadv16sf_mask:
39868 case CODE_FOR_avx512f_loadv16si_mask:
39869 case CODE_FOR_avx512f_loadv8df_mask:
39870 case CODE_FOR_avx512f_loadv8di_mask:
39871 case CODE_FOR_avx512vl_loadv8sf_mask:
39872 case CODE_FOR_avx512vl_loadv8si_mask:
39873 case CODE_FOR_avx512vl_loadv4df_mask:
39874 case CODE_FOR_avx512vl_loadv4di_mask:
39875 case CODE_FOR_avx512vl_loadv4sf_mask:
39876 case CODE_FOR_avx512vl_loadv4si_mask:
39877 case CODE_FOR_avx512vl_loadv2df_mask:
39878 case CODE_FOR_avx512vl_loadv2di_mask:
39879 case CODE_FOR_avx512bw_loadv64qi_mask:
39880 case CODE_FOR_avx512vl_loadv32qi_mask:
39881 case CODE_FOR_avx512vl_loadv16qi_mask:
39882 case CODE_FOR_avx512bw_loadv32hi_mask:
39883 case CODE_FOR_avx512vl_loadv16hi_mask:
39884 case CODE_FOR_avx512vl_loadv8hi_mask:
39885 aligned_mem = true;
39891 case VOID_FTYPE_UINT_UINT_UINT:
39892 case VOID_FTYPE_UINT64_UINT_UINT:
39893 case UCHAR_FTYPE_UINT_UINT_UINT:
39894 case UCHAR_FTYPE_UINT64_UINT_UINT:
39897 memory = ARRAY_SIZE (args);
39898 last_arg_constant = true;
39901 gcc_unreachable ();
39904 gcc_assert (nargs <= ARRAY_SIZE (args));
39906 if (klass == store)
39908 arg = CALL_EXPR_ARG (exp, 0);
39909 op = expand_normal (arg);
39910 gcc_assert (target == 0);
39913 op = ix86_zero_extend_to_Pmode (op);
39914 target = gen_rtx_MEM (tmode, op);
39915 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
39916 on it. Try to improve it using get_pointer_alignment,
39917 and if the special builtin is one that requires strict
39918 mode alignment, also from it's GET_MODE_ALIGNMENT.
39919 Failure to do so could lead to ix86_legitimate_combined_insn
39920 rejecting all changes to such insns. */
39921 unsigned int align = get_pointer_alignment (arg);
39922 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
39923 align = GET_MODE_ALIGNMENT (tmode);
39924 if (MEM_ALIGN (target) < align)
39925 set_mem_align (target, align);
39928 target = force_reg (tmode, op);
39936 || !register_operand (target, tmode)
39937 || GET_MODE (target) != tmode)
39938 target = gen_reg_rtx (tmode);
39941 for (i = 0; i < nargs; i++)
39943 machine_mode mode = insn_p->operand[i + 1].mode;
39946 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
39947 op = expand_normal (arg);
39948 match = insn_p->operand[i + 1].predicate (op, mode);
39950 if (last_arg_constant && (i + 1) == nargs)
39954 if (icode == CODE_FOR_lwp_lwpvalsi3
39955 || icode == CODE_FOR_lwp_lwpinssi3
39956 || icode == CODE_FOR_lwp_lwpvaldi3
39957 || icode == CODE_FOR_lwp_lwpinsdi3)
39958 error ("the last argument must be a 32-bit immediate");
39960 error ("the last argument must be an 8-bit immediate");
39968 /* This must be the memory operand. */
39969 op = ix86_zero_extend_to_Pmode (op);
39970 op = gen_rtx_MEM (mode, op);
39971 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
39972 on it. Try to improve it using get_pointer_alignment,
39973 and if the special builtin is one that requires strict
39974 mode alignment, also from it's GET_MODE_ALIGNMENT.
39975 Failure to do so could lead to ix86_legitimate_combined_insn
39976 rejecting all changes to such insns. */
39977 unsigned int align = get_pointer_alignment (arg);
39978 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
39979 align = GET_MODE_ALIGNMENT (mode);
39980 if (MEM_ALIGN (op) < align)
39981 set_mem_align (op, align);
39985 /* This must be register. */
39986 if (VECTOR_MODE_P (mode))
39987 op = safe_vector_operand (op, mode);
39989 op = fixup_modeless_constant (op, mode);
39991 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
39992 op = copy_to_mode_reg (mode, op);
39995 op = copy_to_reg (op);
39996 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
40002 args[i].mode = mode;
40008 pat = GEN_FCN (icode) (target);
40011 pat = GEN_FCN (icode) (target, args[0].op);
40014 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
40017 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
40020 gcc_unreachable ();
40026 return klass == store ? 0 : target;
40029 /* Return the integer constant in ARG. Constrain it to be in the range
40030 of the subparts of VEC_TYPE; issue an error if not. */
40033 get_element_number (tree vec_type, tree arg)
40035 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
40037 if (!tree_fits_uhwi_p (arg)
40038 || (elt = tree_to_uhwi (arg), elt > max))
40040 error ("selector must be an integer constant in the range 0..%wi", max);
40047 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40048 ix86_expand_vector_init. We DO have language-level syntax for this, in
40049 the form of (type){ init-list }. Except that since we can't place emms
40050 instructions from inside the compiler, we can't allow the use of MMX
40051 registers unless the user explicitly asks for it. So we do *not* define
40052 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
40053 we have builtins invoked by mmintrin.h that gives us license to emit
40054 these sorts of instructions. */
40057 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
40059 machine_mode tmode = TYPE_MODE (type);
40060 machine_mode inner_mode = GET_MODE_INNER (tmode);
40061 int i, n_elt = GET_MODE_NUNITS (tmode);
40062 rtvec v = rtvec_alloc (n_elt);
40064 gcc_assert (VECTOR_MODE_P (tmode));
40065 gcc_assert (call_expr_nargs (exp) == n_elt);
40067 for (i = 0; i < n_elt; ++i)
40069 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
40070 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
40073 if (!target || !register_operand (target, tmode))
40074 target = gen_reg_rtx (tmode);
40076 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
40080 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40081 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
40082 had a language-level syntax for referencing vector elements. */
40085 ix86_expand_vec_ext_builtin (tree exp, rtx target)
40087 machine_mode tmode, mode0;
40092 arg0 = CALL_EXPR_ARG (exp, 0);
40093 arg1 = CALL_EXPR_ARG (exp, 1);
40095 op0 = expand_normal (arg0);
40096 elt = get_element_number (TREE_TYPE (arg0), arg1);
40098 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40099 mode0 = TYPE_MODE (TREE_TYPE (arg0));
40100 gcc_assert (VECTOR_MODE_P (mode0));
40102 op0 = force_reg (mode0, op0);
40104 if (optimize || !target || !register_operand (target, tmode))
40105 target = gen_reg_rtx (tmode);
40107 ix86_expand_vector_extract (true, target, op0, elt);
40112 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
40113 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
40114 a language-level syntax for referencing vector elements. */
40117 ix86_expand_vec_set_builtin (tree exp)
40119 machine_mode tmode, mode1;
40120 tree arg0, arg1, arg2;
40122 rtx op0, op1, target;
40124 arg0 = CALL_EXPR_ARG (exp, 0);
40125 arg1 = CALL_EXPR_ARG (exp, 1);
40126 arg2 = CALL_EXPR_ARG (exp, 2);
40128 tmode = TYPE_MODE (TREE_TYPE (arg0));
40129 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
40130 gcc_assert (VECTOR_MODE_P (tmode));
40132 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
40133 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
40134 elt = get_element_number (TREE_TYPE (arg0), arg2);
40136 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
40137 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
40139 op0 = force_reg (tmode, op0);
40140 op1 = force_reg (mode1, op1);
40142 /* OP0 is the source of these builtin functions and shouldn't be
40143 modified. Create a copy, use it and return it as target. */
40144 target = gen_reg_rtx (tmode);
40145 emit_move_insn (target, op0);
40146 ix86_expand_vector_set (true, target, op1, elt);
40151 /* Emit conditional move of SRC to DST with condition
40154 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
40160 t = ix86_expand_compare (code, op1, op2);
40161 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
40166 rtx_code_label *nomove = gen_label_rtx ();
40167 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
40168 const0_rtx, GET_MODE (op1), 1, nomove);
40169 emit_move_insn (dst, src);
40170 emit_label (nomove);
40174 /* Choose max of DST and SRC and put it to DST. */
40176 ix86_emit_move_max (rtx dst, rtx src)
40178 ix86_emit_cmove (dst, src, LTU, dst, src);
40181 /* Expand an expression EXP that calls a built-in function,
40182 with result going to TARGET if that's convenient
40183 (and in mode MODE if that's convenient).
40184 SUBTARGET may be used as the target for computing one of EXP's operands.
40185 IGNORE is nonzero if the value is to be ignored. */
40188 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
40189 machine_mode mode, int ignore)
40191 const struct builtin_description *d;
40193 enum insn_code icode;
40194 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
40195 tree arg0, arg1, arg2, arg3, arg4;
40196 rtx op0, op1, op2, op3, op4, pat, insn;
40197 machine_mode mode0, mode1, mode2, mode3, mode4;
40198 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
40200 /* For CPU builtins that can be folded, fold first and expand the fold. */
40203 case IX86_BUILTIN_CPU_INIT:
40205 /* Make it call __cpu_indicator_init in libgcc. */
40206 tree call_expr, fndecl, type;
40207 type = build_function_type_list (integer_type_node, NULL_TREE);
40208 fndecl = build_fn_decl ("__cpu_indicator_init", type);
40209 call_expr = build_call_expr (fndecl, 0);
40210 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
40212 case IX86_BUILTIN_CPU_IS:
40213 case IX86_BUILTIN_CPU_SUPPORTS:
40215 tree arg0 = CALL_EXPR_ARG (exp, 0);
40216 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
40217 gcc_assert (fold_expr != NULL_TREE);
40218 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
40222 /* Determine whether the builtin function is available under the current ISA.
40223 Originally the builtin was not created if it wasn't applicable to the
40224 current ISA based on the command line switches. With function specific
40225 options, we need to check in the context of the function making the call
40226 whether it is supported. */
40227 if (ix86_builtins_isa[fcode].isa
40228 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
40230 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
40231 NULL, (enum fpmath_unit) 0, false);
40234 error ("%qE needs unknown isa option", fndecl);
40237 gcc_assert (opts != NULL);
40238 error ("%qE needs isa option %s", fndecl, opts);
40246 case IX86_BUILTIN_BNDMK:
40248 || GET_MODE (target) != BNDmode
40249 || !register_operand (target, BNDmode))
40250 target = gen_reg_rtx (BNDmode);
40252 arg0 = CALL_EXPR_ARG (exp, 0);
40253 arg1 = CALL_EXPR_ARG (exp, 1);
40255 op0 = expand_normal (arg0);
40256 op1 = expand_normal (arg1);
40258 if (!register_operand (op0, Pmode))
40259 op0 = ix86_zero_extend_to_Pmode (op0);
40260 if (!register_operand (op1, Pmode))
40261 op1 = ix86_zero_extend_to_Pmode (op1);
40263 /* Builtin arg1 is size of block but instruction op1 should
40265 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
40266 NULL_RTX, 1, OPTAB_DIRECT);
40268 emit_insn (BNDmode == BND64mode
40269 ? gen_bnd64_mk (target, op0, op1)
40270 : gen_bnd32_mk (target, op0, op1));
40273 case IX86_BUILTIN_BNDSTX:
40274 arg0 = CALL_EXPR_ARG (exp, 0);
40275 arg1 = CALL_EXPR_ARG (exp, 1);
40276 arg2 = CALL_EXPR_ARG (exp, 2);
40278 op0 = expand_normal (arg0);
40279 op1 = expand_normal (arg1);
40280 op2 = expand_normal (arg2);
40282 if (!register_operand (op0, Pmode))
40283 op0 = ix86_zero_extend_to_Pmode (op0);
40284 if (!register_operand (op1, BNDmode))
40285 op1 = copy_to_mode_reg (BNDmode, op1);
40286 if (!register_operand (op2, Pmode))
40287 op2 = ix86_zero_extend_to_Pmode (op2);
40289 emit_insn (BNDmode == BND64mode
40290 ? gen_bnd64_stx (op2, op0, op1)
40291 : gen_bnd32_stx (op2, op0, op1));
40294 case IX86_BUILTIN_BNDLDX:
40296 || GET_MODE (target) != BNDmode
40297 || !register_operand (target, BNDmode))
40298 target = gen_reg_rtx (BNDmode);
40300 arg0 = CALL_EXPR_ARG (exp, 0);
40301 arg1 = CALL_EXPR_ARG (exp, 1);
40303 op0 = expand_normal (arg0);
40304 op1 = expand_normal (arg1);
40306 if (!register_operand (op0, Pmode))
40307 op0 = ix86_zero_extend_to_Pmode (op0);
40308 if (!register_operand (op1, Pmode))
40309 op1 = ix86_zero_extend_to_Pmode (op1);
40311 emit_insn (BNDmode == BND64mode
40312 ? gen_bnd64_ldx (target, op0, op1)
40313 : gen_bnd32_ldx (target, op0, op1));
40316 case IX86_BUILTIN_BNDCL:
40317 arg0 = CALL_EXPR_ARG (exp, 0);
40318 arg1 = CALL_EXPR_ARG (exp, 1);
40320 op0 = expand_normal (arg0);
40321 op1 = expand_normal (arg1);
40323 if (!register_operand (op0, Pmode))
40324 op0 = ix86_zero_extend_to_Pmode (op0);
40325 if (!register_operand (op1, BNDmode))
40326 op1 = copy_to_mode_reg (BNDmode, op1);
40328 emit_insn (BNDmode == BND64mode
40329 ? gen_bnd64_cl (op1, op0)
40330 : gen_bnd32_cl (op1, op0));
40333 case IX86_BUILTIN_BNDCU:
40334 arg0 = CALL_EXPR_ARG (exp, 0);
40335 arg1 = CALL_EXPR_ARG (exp, 1);
40337 op0 = expand_normal (arg0);
40338 op1 = expand_normal (arg1);
40340 if (!register_operand (op0, Pmode))
40341 op0 = ix86_zero_extend_to_Pmode (op0);
40342 if (!register_operand (op1, BNDmode))
40343 op1 = copy_to_mode_reg (BNDmode, op1);
40345 emit_insn (BNDmode == BND64mode
40346 ? gen_bnd64_cu (op1, op0)
40347 : gen_bnd32_cu (op1, op0));
40350 case IX86_BUILTIN_BNDRET:
40351 arg0 = CALL_EXPR_ARG (exp, 0);
40352 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
40353 target = chkp_get_rtl_bounds (arg0);
40355 /* If no bounds were specified for returned value,
40356 then use INIT bounds. It usually happens when
40357 some built-in function is expanded. */
40360 rtx t1 = gen_reg_rtx (Pmode);
40361 rtx t2 = gen_reg_rtx (Pmode);
40362 target = gen_reg_rtx (BNDmode);
40363 emit_move_insn (t1, const0_rtx);
40364 emit_move_insn (t2, constm1_rtx);
40365 emit_insn (BNDmode == BND64mode
40366 ? gen_bnd64_mk (target, t1, t2)
40367 : gen_bnd32_mk (target, t1, t2));
40370 gcc_assert (target && REG_P (target));
40373 case IX86_BUILTIN_BNDNARROW:
40375 rtx m1, m1h1, m1h2, lb, ub, t1;
40377 /* Return value and lb. */
40378 arg0 = CALL_EXPR_ARG (exp, 0);
40380 arg1 = CALL_EXPR_ARG (exp, 1);
40382 arg2 = CALL_EXPR_ARG (exp, 2);
40384 lb = expand_normal (arg0);
40385 op1 = expand_normal (arg1);
40386 op2 = expand_normal (arg2);
40388 /* Size was passed but we need to use (size - 1) as for bndmk. */
40389 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
40390 NULL_RTX, 1, OPTAB_DIRECT);
40392 /* Add LB to size and inverse to get UB. */
40393 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
40394 op2, 1, OPTAB_DIRECT);
40395 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
40397 if (!register_operand (lb, Pmode))
40398 lb = ix86_zero_extend_to_Pmode (lb);
40399 if (!register_operand (ub, Pmode))
40400 ub = ix86_zero_extend_to_Pmode (ub);
40402 /* We need to move bounds to memory before any computations. */
40407 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
40408 emit_move_insn (m1, op1);
40411 /* Generate mem expression to be used for access to LB and UB. */
40412 m1h1 = adjust_address (m1, Pmode, 0);
40413 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
40415 t1 = gen_reg_rtx (Pmode);
40418 emit_move_insn (t1, m1h1);
40419 ix86_emit_move_max (t1, lb);
40420 emit_move_insn (m1h1, t1);
40422 /* Compute UB. UB is stored in 1's complement form. Therefore
40423 we also use max here. */
40424 emit_move_insn (t1, m1h2);
40425 ix86_emit_move_max (t1, ub);
40426 emit_move_insn (m1h2, t1);
40428 op2 = gen_reg_rtx (BNDmode);
40429 emit_move_insn (op2, m1);
40431 return chkp_join_splitted_slot (lb, op2);
40434 case IX86_BUILTIN_BNDINT:
40436 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
40439 || GET_MODE (target) != BNDmode
40440 || !register_operand (target, BNDmode))
40441 target = gen_reg_rtx (BNDmode);
40443 arg0 = CALL_EXPR_ARG (exp, 0);
40444 arg1 = CALL_EXPR_ARG (exp, 1);
40446 op0 = expand_normal (arg0);
40447 op1 = expand_normal (arg1);
40449 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
40450 rh1 = adjust_address (res, Pmode, 0);
40451 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
40453 /* Put first bounds to temporaries. */
40454 lb1 = gen_reg_rtx (Pmode);
40455 ub1 = gen_reg_rtx (Pmode);
40458 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
40459 emit_move_insn (ub1, adjust_address (op0, Pmode,
40460 GET_MODE_SIZE (Pmode)));
40464 emit_move_insn (res, op0);
40465 emit_move_insn (lb1, rh1);
40466 emit_move_insn (ub1, rh2);
40469 /* Put second bounds to temporaries. */
40470 lb2 = gen_reg_rtx (Pmode);
40471 ub2 = gen_reg_rtx (Pmode);
40474 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
40475 emit_move_insn (ub2, adjust_address (op1, Pmode,
40476 GET_MODE_SIZE (Pmode)));
40480 emit_move_insn (res, op1);
40481 emit_move_insn (lb2, rh1);
40482 emit_move_insn (ub2, rh2);
40486 ix86_emit_move_max (lb1, lb2);
40487 emit_move_insn (rh1, lb1);
40489 /* Compute UB. UB is stored in 1's complement form. Therefore
40490 we also use max here. */
40491 ix86_emit_move_max (ub1, ub2);
40492 emit_move_insn (rh2, ub1);
40494 emit_move_insn (target, res);
40499 case IX86_BUILTIN_SIZEOF:
40505 || GET_MODE (target) != Pmode
40506 || !register_operand (target, Pmode))
40507 target = gen_reg_rtx (Pmode);
40509 arg0 = CALL_EXPR_ARG (exp, 0);
40510 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
40512 name = DECL_ASSEMBLER_NAME (arg0);
40513 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
40515 emit_insn (Pmode == SImode
40516 ? gen_move_size_reloc_si (target, symbol)
40517 : gen_move_size_reloc_di (target, symbol));
40522 case IX86_BUILTIN_BNDLOWER:
40527 || GET_MODE (target) != Pmode
40528 || !register_operand (target, Pmode))
40529 target = gen_reg_rtx (Pmode);
40531 arg0 = CALL_EXPR_ARG (exp, 0);
40532 op0 = expand_normal (arg0);
40534 /* We need to move bounds to memory first. */
40539 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40540 emit_move_insn (mem, op0);
40543 /* Generate mem expression to access LB and load it. */
40544 hmem = adjust_address (mem, Pmode, 0);
40545 emit_move_insn (target, hmem);
40550 case IX86_BUILTIN_BNDUPPER:
40552 rtx mem, hmem, res;
40555 || GET_MODE (target) != Pmode
40556 || !register_operand (target, Pmode))
40557 target = gen_reg_rtx (Pmode);
40559 arg0 = CALL_EXPR_ARG (exp, 0);
40560 op0 = expand_normal (arg0);
40562 /* We need to move bounds to memory first. */
40567 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
40568 emit_move_insn (mem, op0);
40571 /* Generate mem expression to access UB. */
40572 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
40574 /* We need to inverse all bits of UB. */
40575 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
40578 emit_move_insn (target, res);
40583 case IX86_BUILTIN_MASKMOVQ:
40584 case IX86_BUILTIN_MASKMOVDQU:
40585 icode = (fcode == IX86_BUILTIN_MASKMOVQ
40586 ? CODE_FOR_mmx_maskmovq
40587 : CODE_FOR_sse2_maskmovdqu);
40588 /* Note the arg order is different from the operand order. */
40589 arg1 = CALL_EXPR_ARG (exp, 0);
40590 arg2 = CALL_EXPR_ARG (exp, 1);
40591 arg0 = CALL_EXPR_ARG (exp, 2);
40592 op0 = expand_normal (arg0);
40593 op1 = expand_normal (arg1);
40594 op2 = expand_normal (arg2);
40595 mode0 = insn_data[icode].operand[0].mode;
40596 mode1 = insn_data[icode].operand[1].mode;
40597 mode2 = insn_data[icode].operand[2].mode;
40599 op0 = ix86_zero_extend_to_Pmode (op0);
40600 op0 = gen_rtx_MEM (mode1, op0);
40602 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40603 op0 = copy_to_mode_reg (mode0, op0);
40604 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40605 op1 = copy_to_mode_reg (mode1, op1);
40606 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40607 op2 = copy_to_mode_reg (mode2, op2);
40608 pat = GEN_FCN (icode) (op0, op1, op2);
40614 case IX86_BUILTIN_LDMXCSR:
40615 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
40616 target = assign_386_stack_local (SImode, SLOT_TEMP);
40617 emit_move_insn (target, op0);
40618 emit_insn (gen_sse_ldmxcsr (target));
40621 case IX86_BUILTIN_STMXCSR:
40622 target = assign_386_stack_local (SImode, SLOT_TEMP);
40623 emit_insn (gen_sse_stmxcsr (target));
40624 return copy_to_mode_reg (SImode, target);
40626 case IX86_BUILTIN_CLFLUSH:
40627 arg0 = CALL_EXPR_ARG (exp, 0);
40628 op0 = expand_normal (arg0);
40629 icode = CODE_FOR_sse2_clflush;
40630 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40631 op0 = ix86_zero_extend_to_Pmode (op0);
40633 emit_insn (gen_sse2_clflush (op0));
40636 case IX86_BUILTIN_CLWB:
40637 arg0 = CALL_EXPR_ARG (exp, 0);
40638 op0 = expand_normal (arg0);
40639 icode = CODE_FOR_clwb;
40640 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40641 op0 = ix86_zero_extend_to_Pmode (op0);
40643 emit_insn (gen_clwb (op0));
40646 case IX86_BUILTIN_CLFLUSHOPT:
40647 arg0 = CALL_EXPR_ARG (exp, 0);
40648 op0 = expand_normal (arg0);
40649 icode = CODE_FOR_clflushopt;
40650 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40651 op0 = ix86_zero_extend_to_Pmode (op0);
40653 emit_insn (gen_clflushopt (op0));
40656 case IX86_BUILTIN_MONITOR:
40657 case IX86_BUILTIN_MONITORX:
40658 arg0 = CALL_EXPR_ARG (exp, 0);
40659 arg1 = CALL_EXPR_ARG (exp, 1);
40660 arg2 = CALL_EXPR_ARG (exp, 2);
40661 op0 = expand_normal (arg0);
40662 op1 = expand_normal (arg1);
40663 op2 = expand_normal (arg2);
40665 op0 = ix86_zero_extend_to_Pmode (op0);
40667 op1 = copy_to_mode_reg (SImode, op1);
40669 op2 = copy_to_mode_reg (SImode, op2);
40671 emit_insn (fcode == IX86_BUILTIN_MONITOR
40672 ? ix86_gen_monitor (op0, op1, op2)
40673 : ix86_gen_monitorx (op0, op1, op2));
40676 case IX86_BUILTIN_MWAIT:
40677 arg0 = CALL_EXPR_ARG (exp, 0);
40678 arg1 = CALL_EXPR_ARG (exp, 1);
40679 op0 = expand_normal (arg0);
40680 op1 = expand_normal (arg1);
40682 op0 = copy_to_mode_reg (SImode, op0);
40684 op1 = copy_to_mode_reg (SImode, op1);
40685 emit_insn (gen_sse3_mwait (op0, op1));
40688 case IX86_BUILTIN_MWAITX:
40689 arg0 = CALL_EXPR_ARG (exp, 0);
40690 arg1 = CALL_EXPR_ARG (exp, 1);
40691 arg2 = CALL_EXPR_ARG (exp, 2);
40692 op0 = expand_normal (arg0);
40693 op1 = expand_normal (arg1);
40694 op2 = expand_normal (arg2);
40696 op0 = copy_to_mode_reg (SImode, op0);
40698 op1 = copy_to_mode_reg (SImode, op1);
40700 op2 = copy_to_mode_reg (SImode, op2);
40701 emit_insn (gen_mwaitx (op0, op1, op2));
40704 case IX86_BUILTIN_CLZERO:
40705 arg0 = CALL_EXPR_ARG (exp, 0);
40706 op0 = expand_normal (arg0);
40708 op0 = ix86_zero_extend_to_Pmode (op0);
40709 emit_insn (ix86_gen_clzero (op0));
40712 case IX86_BUILTIN_VEC_INIT_V2SI:
40713 case IX86_BUILTIN_VEC_INIT_V4HI:
40714 case IX86_BUILTIN_VEC_INIT_V8QI:
40715 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
40717 case IX86_BUILTIN_VEC_EXT_V2DF:
40718 case IX86_BUILTIN_VEC_EXT_V2DI:
40719 case IX86_BUILTIN_VEC_EXT_V4SF:
40720 case IX86_BUILTIN_VEC_EXT_V4SI:
40721 case IX86_BUILTIN_VEC_EXT_V8HI:
40722 case IX86_BUILTIN_VEC_EXT_V2SI:
40723 case IX86_BUILTIN_VEC_EXT_V4HI:
40724 case IX86_BUILTIN_VEC_EXT_V16QI:
40725 return ix86_expand_vec_ext_builtin (exp, target);
40727 case IX86_BUILTIN_VEC_SET_V2DI:
40728 case IX86_BUILTIN_VEC_SET_V4SF:
40729 case IX86_BUILTIN_VEC_SET_V4SI:
40730 case IX86_BUILTIN_VEC_SET_V8HI:
40731 case IX86_BUILTIN_VEC_SET_V4HI:
40732 case IX86_BUILTIN_VEC_SET_V16QI:
40733 return ix86_expand_vec_set_builtin (exp);
40735 case IX86_BUILTIN_INFQ:
40736 case IX86_BUILTIN_HUGE_VALQ:
40738 REAL_VALUE_TYPE inf;
40742 tmp = const_double_from_real_value (inf, mode);
40744 tmp = validize_mem (force_const_mem (mode, tmp));
40747 target = gen_reg_rtx (mode);
40749 emit_move_insn (target, tmp);
40753 case IX86_BUILTIN_RDPMC:
40754 case IX86_BUILTIN_RDTSC:
40755 case IX86_BUILTIN_RDTSCP:
40757 op0 = gen_reg_rtx (DImode);
40758 op1 = gen_reg_rtx (DImode);
40760 if (fcode == IX86_BUILTIN_RDPMC)
40762 arg0 = CALL_EXPR_ARG (exp, 0);
40763 op2 = expand_normal (arg0);
40764 if (!register_operand (op2, SImode))
40765 op2 = copy_to_mode_reg (SImode, op2);
40767 insn = (TARGET_64BIT
40768 ? gen_rdpmc_rex64 (op0, op1, op2)
40769 : gen_rdpmc (op0, op2));
40772 else if (fcode == IX86_BUILTIN_RDTSC)
40774 insn = (TARGET_64BIT
40775 ? gen_rdtsc_rex64 (op0, op1)
40776 : gen_rdtsc (op0));
40781 op2 = gen_reg_rtx (SImode);
40783 insn = (TARGET_64BIT
40784 ? gen_rdtscp_rex64 (op0, op1, op2)
40785 : gen_rdtscp (op0, op2));
40788 arg0 = CALL_EXPR_ARG (exp, 0);
40789 op4 = expand_normal (arg0);
40790 if (!address_operand (op4, VOIDmode))
40792 op4 = convert_memory_address (Pmode, op4);
40793 op4 = copy_addr_to_reg (op4);
40795 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
40800 /* mode is VOIDmode if __builtin_rd* has been called
40802 if (mode == VOIDmode)
40804 target = gen_reg_rtx (mode);
40809 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
40810 op1, 1, OPTAB_DIRECT);
40811 op0 = expand_simple_binop (DImode, IOR, op0, op1,
40812 op0, 1, OPTAB_DIRECT);
40815 emit_move_insn (target, op0);
40818 case IX86_BUILTIN_FXSAVE:
40819 case IX86_BUILTIN_FXRSTOR:
40820 case IX86_BUILTIN_FXSAVE64:
40821 case IX86_BUILTIN_FXRSTOR64:
40822 case IX86_BUILTIN_FNSTENV:
40823 case IX86_BUILTIN_FLDENV:
40827 case IX86_BUILTIN_FXSAVE:
40828 icode = CODE_FOR_fxsave;
40830 case IX86_BUILTIN_FXRSTOR:
40831 icode = CODE_FOR_fxrstor;
40833 case IX86_BUILTIN_FXSAVE64:
40834 icode = CODE_FOR_fxsave64;
40836 case IX86_BUILTIN_FXRSTOR64:
40837 icode = CODE_FOR_fxrstor64;
40839 case IX86_BUILTIN_FNSTENV:
40840 icode = CODE_FOR_fnstenv;
40842 case IX86_BUILTIN_FLDENV:
40843 icode = CODE_FOR_fldenv;
40846 gcc_unreachable ();
40849 arg0 = CALL_EXPR_ARG (exp, 0);
40850 op0 = expand_normal (arg0);
40852 if (!address_operand (op0, VOIDmode))
40854 op0 = convert_memory_address (Pmode, op0);
40855 op0 = copy_addr_to_reg (op0);
40857 op0 = gen_rtx_MEM (mode0, op0);
40859 pat = GEN_FCN (icode) (op0);
40864 case IX86_BUILTIN_XSAVE:
40865 case IX86_BUILTIN_XRSTOR:
40866 case IX86_BUILTIN_XSAVE64:
40867 case IX86_BUILTIN_XRSTOR64:
40868 case IX86_BUILTIN_XSAVEOPT:
40869 case IX86_BUILTIN_XSAVEOPT64:
40870 case IX86_BUILTIN_XSAVES:
40871 case IX86_BUILTIN_XRSTORS:
40872 case IX86_BUILTIN_XSAVES64:
40873 case IX86_BUILTIN_XRSTORS64:
40874 case IX86_BUILTIN_XSAVEC:
40875 case IX86_BUILTIN_XSAVEC64:
40876 arg0 = CALL_EXPR_ARG (exp, 0);
40877 arg1 = CALL_EXPR_ARG (exp, 1);
40878 op0 = expand_normal (arg0);
40879 op1 = expand_normal (arg1);
40881 if (!address_operand (op0, VOIDmode))
40883 op0 = convert_memory_address (Pmode, op0);
40884 op0 = copy_addr_to_reg (op0);
40886 op0 = gen_rtx_MEM (BLKmode, op0);
40888 op1 = force_reg (DImode, op1);
40892 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
40893 NULL, 1, OPTAB_DIRECT);
40896 case IX86_BUILTIN_XSAVE:
40897 icode = CODE_FOR_xsave_rex64;
40899 case IX86_BUILTIN_XRSTOR:
40900 icode = CODE_FOR_xrstor_rex64;
40902 case IX86_BUILTIN_XSAVE64:
40903 icode = CODE_FOR_xsave64;
40905 case IX86_BUILTIN_XRSTOR64:
40906 icode = CODE_FOR_xrstor64;
40908 case IX86_BUILTIN_XSAVEOPT:
40909 icode = CODE_FOR_xsaveopt_rex64;
40911 case IX86_BUILTIN_XSAVEOPT64:
40912 icode = CODE_FOR_xsaveopt64;
40914 case IX86_BUILTIN_XSAVES:
40915 icode = CODE_FOR_xsaves_rex64;
40917 case IX86_BUILTIN_XRSTORS:
40918 icode = CODE_FOR_xrstors_rex64;
40920 case IX86_BUILTIN_XSAVES64:
40921 icode = CODE_FOR_xsaves64;
40923 case IX86_BUILTIN_XRSTORS64:
40924 icode = CODE_FOR_xrstors64;
40926 case IX86_BUILTIN_XSAVEC:
40927 icode = CODE_FOR_xsavec_rex64;
40929 case IX86_BUILTIN_XSAVEC64:
40930 icode = CODE_FOR_xsavec64;
40933 gcc_unreachable ();
40936 op2 = gen_lowpart (SImode, op2);
40937 op1 = gen_lowpart (SImode, op1);
40938 pat = GEN_FCN (icode) (op0, op1, op2);
40944 case IX86_BUILTIN_XSAVE:
40945 icode = CODE_FOR_xsave;
40947 case IX86_BUILTIN_XRSTOR:
40948 icode = CODE_FOR_xrstor;
40950 case IX86_BUILTIN_XSAVEOPT:
40951 icode = CODE_FOR_xsaveopt;
40953 case IX86_BUILTIN_XSAVES:
40954 icode = CODE_FOR_xsaves;
40956 case IX86_BUILTIN_XRSTORS:
40957 icode = CODE_FOR_xrstors;
40959 case IX86_BUILTIN_XSAVEC:
40960 icode = CODE_FOR_xsavec;
40963 gcc_unreachable ();
40965 pat = GEN_FCN (icode) (op0, op1);
40972 case IX86_BUILTIN_LLWPCB:
40973 arg0 = CALL_EXPR_ARG (exp, 0);
40974 op0 = expand_normal (arg0);
40975 icode = CODE_FOR_lwp_llwpcb;
40976 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40977 op0 = ix86_zero_extend_to_Pmode (op0);
40978 emit_insn (gen_lwp_llwpcb (op0));
40981 case IX86_BUILTIN_SLWPCB:
40982 icode = CODE_FOR_lwp_slwpcb;
40984 || !insn_data[icode].operand[0].predicate (target, Pmode))
40985 target = gen_reg_rtx (Pmode);
40986 emit_insn (gen_lwp_slwpcb (target));
40989 case IX86_BUILTIN_BEXTRI32:
40990 case IX86_BUILTIN_BEXTRI64:
40991 arg0 = CALL_EXPR_ARG (exp, 0);
40992 arg1 = CALL_EXPR_ARG (exp, 1);
40993 op0 = expand_normal (arg0);
40994 op1 = expand_normal (arg1);
40995 icode = (fcode == IX86_BUILTIN_BEXTRI32
40996 ? CODE_FOR_tbm_bextri_si
40997 : CODE_FOR_tbm_bextri_di);
40998 if (!CONST_INT_P (op1))
41000 error ("last argument must be an immediate");
41005 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
41006 unsigned char lsb_index = INTVAL (op1) & 0xFF;
41007 op1 = GEN_INT (length);
41008 op2 = GEN_INT (lsb_index);
41009 pat = GEN_FCN (icode) (target, op0, op1, op2);
41015 case IX86_BUILTIN_RDRAND16_STEP:
41016 icode = CODE_FOR_rdrandhi_1;
41020 case IX86_BUILTIN_RDRAND32_STEP:
41021 icode = CODE_FOR_rdrandsi_1;
41025 case IX86_BUILTIN_RDRAND64_STEP:
41026 icode = CODE_FOR_rdranddi_1;
41030 op0 = gen_reg_rtx (mode0);
41031 emit_insn (GEN_FCN (icode) (op0));
41033 arg0 = CALL_EXPR_ARG (exp, 0);
41034 op1 = expand_normal (arg0);
41035 if (!address_operand (op1, VOIDmode))
41037 op1 = convert_memory_address (Pmode, op1);
41038 op1 = copy_addr_to_reg (op1);
41040 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41042 op1 = gen_reg_rtx (SImode);
41043 emit_move_insn (op1, CONST1_RTX (SImode));
41045 /* Emit SImode conditional move. */
41046 if (mode0 == HImode)
41048 op2 = gen_reg_rtx (SImode);
41049 emit_insn (gen_zero_extendhisi2 (op2, op0));
41051 else if (mode0 == SImode)
41054 op2 = gen_rtx_SUBREG (SImode, op0, 0);
41057 || !register_operand (target, SImode))
41058 target = gen_reg_rtx (SImode);
41060 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
41062 emit_insn (gen_rtx_SET (target,
41063 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
41066 case IX86_BUILTIN_RDSEED16_STEP:
41067 icode = CODE_FOR_rdseedhi_1;
41071 case IX86_BUILTIN_RDSEED32_STEP:
41072 icode = CODE_FOR_rdseedsi_1;
41076 case IX86_BUILTIN_RDSEED64_STEP:
41077 icode = CODE_FOR_rdseeddi_1;
41081 op0 = gen_reg_rtx (mode0);
41082 emit_insn (GEN_FCN (icode) (op0));
41084 arg0 = CALL_EXPR_ARG (exp, 0);
41085 op1 = expand_normal (arg0);
41086 if (!address_operand (op1, VOIDmode))
41088 op1 = convert_memory_address (Pmode, op1);
41089 op1 = copy_addr_to_reg (op1);
41091 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
41093 op2 = gen_reg_rtx (QImode);
41095 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
41097 emit_insn (gen_rtx_SET (op2, pat));
41100 || !register_operand (target, SImode))
41101 target = gen_reg_rtx (SImode);
41103 emit_insn (gen_zero_extendqisi2 (target, op2));
41106 case IX86_BUILTIN_SBB32:
41107 icode = CODE_FOR_subborrowsi;
41111 case IX86_BUILTIN_SBB64:
41112 icode = CODE_FOR_subborrowdi;
41116 case IX86_BUILTIN_ADDCARRYX32:
41117 icode = CODE_FOR_addcarrysi;
41121 case IX86_BUILTIN_ADDCARRYX64:
41122 icode = CODE_FOR_addcarrydi;
41126 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
41127 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
41128 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
41129 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
41131 op1 = expand_normal (arg0);
41132 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
41134 op2 = expand_normal (arg1);
41135 if (!register_operand (op2, mode0))
41136 op2 = copy_to_mode_reg (mode0, op2);
41138 op3 = expand_normal (arg2);
41139 if (!register_operand (op3, mode0))
41140 op3 = copy_to_mode_reg (mode0, op3);
41142 op4 = expand_normal (arg3);
41143 if (!address_operand (op4, VOIDmode))
41145 op4 = convert_memory_address (Pmode, op4);
41146 op4 = copy_addr_to_reg (op4);
41149 /* Generate CF from input operand. */
41150 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
41152 /* Generate instruction that consumes CF. */
41153 op0 = gen_reg_rtx (mode0);
41155 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
41156 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
41157 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
41159 /* Return current CF value. */
41161 target = gen_reg_rtx (QImode);
41163 PUT_MODE (pat, QImode);
41164 emit_insn (gen_rtx_SET (target, pat));
41166 /* Store the result. */
41167 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
41171 case IX86_BUILTIN_READ_FLAGS:
41172 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
41175 || target == NULL_RTX
41176 || !nonimmediate_operand (target, word_mode)
41177 || GET_MODE (target) != word_mode)
41178 target = gen_reg_rtx (word_mode);
41180 emit_insn (gen_pop (target));
41183 case IX86_BUILTIN_WRITE_FLAGS:
41185 arg0 = CALL_EXPR_ARG (exp, 0);
41186 op0 = expand_normal (arg0);
41187 if (!general_no_elim_operand (op0, word_mode))
41188 op0 = copy_to_mode_reg (word_mode, op0);
41190 emit_insn (gen_push (op0));
41191 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
41194 case IX86_BUILTIN_KORTESTC16:
41195 icode = CODE_FOR_kortestchi;
41200 case IX86_BUILTIN_KORTESTZ16:
41201 icode = CODE_FOR_kortestzhi;
41206 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
41207 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
41208 op0 = expand_normal (arg0);
41209 op1 = expand_normal (arg1);
41211 op0 = copy_to_reg (op0);
41212 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41213 op1 = copy_to_reg (op1);
41214 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
41216 target = gen_reg_rtx (QImode);
41217 emit_insn (gen_rtx_SET (target, const0_rtx));
41219 /* Emit kortest. */
41220 emit_insn (GEN_FCN (icode) (op0, op1));
41221 /* And use setcc to return result from flags. */
41222 ix86_expand_setcc (target, EQ,
41223 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
41226 case IX86_BUILTIN_GATHERSIV2DF:
41227 icode = CODE_FOR_avx2_gathersiv2df;
41229 case IX86_BUILTIN_GATHERSIV4DF:
41230 icode = CODE_FOR_avx2_gathersiv4df;
41232 case IX86_BUILTIN_GATHERDIV2DF:
41233 icode = CODE_FOR_avx2_gatherdiv2df;
41235 case IX86_BUILTIN_GATHERDIV4DF:
41236 icode = CODE_FOR_avx2_gatherdiv4df;
41238 case IX86_BUILTIN_GATHERSIV4SF:
41239 icode = CODE_FOR_avx2_gathersiv4sf;
41241 case IX86_BUILTIN_GATHERSIV8SF:
41242 icode = CODE_FOR_avx2_gathersiv8sf;
41244 case IX86_BUILTIN_GATHERDIV4SF:
41245 icode = CODE_FOR_avx2_gatherdiv4sf;
41247 case IX86_BUILTIN_GATHERDIV8SF:
41248 icode = CODE_FOR_avx2_gatherdiv8sf;
41250 case IX86_BUILTIN_GATHERSIV2DI:
41251 icode = CODE_FOR_avx2_gathersiv2di;
41253 case IX86_BUILTIN_GATHERSIV4DI:
41254 icode = CODE_FOR_avx2_gathersiv4di;
41256 case IX86_BUILTIN_GATHERDIV2DI:
41257 icode = CODE_FOR_avx2_gatherdiv2di;
41259 case IX86_BUILTIN_GATHERDIV4DI:
41260 icode = CODE_FOR_avx2_gatherdiv4di;
41262 case IX86_BUILTIN_GATHERSIV4SI:
41263 icode = CODE_FOR_avx2_gathersiv4si;
41265 case IX86_BUILTIN_GATHERSIV8SI:
41266 icode = CODE_FOR_avx2_gathersiv8si;
41268 case IX86_BUILTIN_GATHERDIV4SI:
41269 icode = CODE_FOR_avx2_gatherdiv4si;
41271 case IX86_BUILTIN_GATHERDIV8SI:
41272 icode = CODE_FOR_avx2_gatherdiv8si;
41274 case IX86_BUILTIN_GATHERALTSIV4DF:
41275 icode = CODE_FOR_avx2_gathersiv4df;
41277 case IX86_BUILTIN_GATHERALTDIV8SF:
41278 icode = CODE_FOR_avx2_gatherdiv8sf;
41280 case IX86_BUILTIN_GATHERALTSIV4DI:
41281 icode = CODE_FOR_avx2_gathersiv4di;
41283 case IX86_BUILTIN_GATHERALTDIV8SI:
41284 icode = CODE_FOR_avx2_gatherdiv8si;
41286 case IX86_BUILTIN_GATHER3SIV16SF:
41287 icode = CODE_FOR_avx512f_gathersiv16sf;
41289 case IX86_BUILTIN_GATHER3SIV8DF:
41290 icode = CODE_FOR_avx512f_gathersiv8df;
41292 case IX86_BUILTIN_GATHER3DIV16SF:
41293 icode = CODE_FOR_avx512f_gatherdiv16sf;
41295 case IX86_BUILTIN_GATHER3DIV8DF:
41296 icode = CODE_FOR_avx512f_gatherdiv8df;
41298 case IX86_BUILTIN_GATHER3SIV16SI:
41299 icode = CODE_FOR_avx512f_gathersiv16si;
41301 case IX86_BUILTIN_GATHER3SIV8DI:
41302 icode = CODE_FOR_avx512f_gathersiv8di;
41304 case IX86_BUILTIN_GATHER3DIV16SI:
41305 icode = CODE_FOR_avx512f_gatherdiv16si;
41307 case IX86_BUILTIN_GATHER3DIV8DI:
41308 icode = CODE_FOR_avx512f_gatherdiv8di;
41310 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41311 icode = CODE_FOR_avx512f_gathersiv8df;
41313 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41314 icode = CODE_FOR_avx512f_gatherdiv16sf;
41316 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41317 icode = CODE_FOR_avx512f_gathersiv8di;
41319 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41320 icode = CODE_FOR_avx512f_gatherdiv16si;
41322 case IX86_BUILTIN_GATHER3SIV2DF:
41323 icode = CODE_FOR_avx512vl_gathersiv2df;
41325 case IX86_BUILTIN_GATHER3SIV4DF:
41326 icode = CODE_FOR_avx512vl_gathersiv4df;
41328 case IX86_BUILTIN_GATHER3DIV2DF:
41329 icode = CODE_FOR_avx512vl_gatherdiv2df;
41331 case IX86_BUILTIN_GATHER3DIV4DF:
41332 icode = CODE_FOR_avx512vl_gatherdiv4df;
41334 case IX86_BUILTIN_GATHER3SIV4SF:
41335 icode = CODE_FOR_avx512vl_gathersiv4sf;
41337 case IX86_BUILTIN_GATHER3SIV8SF:
41338 icode = CODE_FOR_avx512vl_gathersiv8sf;
41340 case IX86_BUILTIN_GATHER3DIV4SF:
41341 icode = CODE_FOR_avx512vl_gatherdiv4sf;
41343 case IX86_BUILTIN_GATHER3DIV8SF:
41344 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41346 case IX86_BUILTIN_GATHER3SIV2DI:
41347 icode = CODE_FOR_avx512vl_gathersiv2di;
41349 case IX86_BUILTIN_GATHER3SIV4DI:
41350 icode = CODE_FOR_avx512vl_gathersiv4di;
41352 case IX86_BUILTIN_GATHER3DIV2DI:
41353 icode = CODE_FOR_avx512vl_gatherdiv2di;
41355 case IX86_BUILTIN_GATHER3DIV4DI:
41356 icode = CODE_FOR_avx512vl_gatherdiv4di;
41358 case IX86_BUILTIN_GATHER3SIV4SI:
41359 icode = CODE_FOR_avx512vl_gathersiv4si;
41361 case IX86_BUILTIN_GATHER3SIV8SI:
41362 icode = CODE_FOR_avx512vl_gathersiv8si;
41364 case IX86_BUILTIN_GATHER3DIV4SI:
41365 icode = CODE_FOR_avx512vl_gatherdiv4si;
41367 case IX86_BUILTIN_GATHER3DIV8SI:
41368 icode = CODE_FOR_avx512vl_gatherdiv8si;
41370 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41371 icode = CODE_FOR_avx512vl_gathersiv4df;
41373 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41374 icode = CODE_FOR_avx512vl_gatherdiv8sf;
41376 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41377 icode = CODE_FOR_avx512vl_gathersiv4di;
41379 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41380 icode = CODE_FOR_avx512vl_gatherdiv8si;
41382 case IX86_BUILTIN_SCATTERSIV16SF:
41383 icode = CODE_FOR_avx512f_scattersiv16sf;
41385 case IX86_BUILTIN_SCATTERSIV8DF:
41386 icode = CODE_FOR_avx512f_scattersiv8df;
41388 case IX86_BUILTIN_SCATTERDIV16SF:
41389 icode = CODE_FOR_avx512f_scatterdiv16sf;
41391 case IX86_BUILTIN_SCATTERDIV8DF:
41392 icode = CODE_FOR_avx512f_scatterdiv8df;
41394 case IX86_BUILTIN_SCATTERSIV16SI:
41395 icode = CODE_FOR_avx512f_scattersiv16si;
41397 case IX86_BUILTIN_SCATTERSIV8DI:
41398 icode = CODE_FOR_avx512f_scattersiv8di;
41400 case IX86_BUILTIN_SCATTERDIV16SI:
41401 icode = CODE_FOR_avx512f_scatterdiv16si;
41403 case IX86_BUILTIN_SCATTERDIV8DI:
41404 icode = CODE_FOR_avx512f_scatterdiv8di;
41406 case IX86_BUILTIN_SCATTERSIV8SF:
41407 icode = CODE_FOR_avx512vl_scattersiv8sf;
41409 case IX86_BUILTIN_SCATTERSIV4SF:
41410 icode = CODE_FOR_avx512vl_scattersiv4sf;
41412 case IX86_BUILTIN_SCATTERSIV4DF:
41413 icode = CODE_FOR_avx512vl_scattersiv4df;
41415 case IX86_BUILTIN_SCATTERSIV2DF:
41416 icode = CODE_FOR_avx512vl_scattersiv2df;
41418 case IX86_BUILTIN_SCATTERDIV8SF:
41419 icode = CODE_FOR_avx512vl_scatterdiv8sf;
41421 case IX86_BUILTIN_SCATTERDIV4SF:
41422 icode = CODE_FOR_avx512vl_scatterdiv4sf;
41424 case IX86_BUILTIN_SCATTERDIV4DF:
41425 icode = CODE_FOR_avx512vl_scatterdiv4df;
41427 case IX86_BUILTIN_SCATTERDIV2DF:
41428 icode = CODE_FOR_avx512vl_scatterdiv2df;
41430 case IX86_BUILTIN_SCATTERSIV8SI:
41431 icode = CODE_FOR_avx512vl_scattersiv8si;
41433 case IX86_BUILTIN_SCATTERSIV4SI:
41434 icode = CODE_FOR_avx512vl_scattersiv4si;
41436 case IX86_BUILTIN_SCATTERSIV4DI:
41437 icode = CODE_FOR_avx512vl_scattersiv4di;
41439 case IX86_BUILTIN_SCATTERSIV2DI:
41440 icode = CODE_FOR_avx512vl_scattersiv2di;
41442 case IX86_BUILTIN_SCATTERDIV8SI:
41443 icode = CODE_FOR_avx512vl_scatterdiv8si;
41445 case IX86_BUILTIN_SCATTERDIV4SI:
41446 icode = CODE_FOR_avx512vl_scatterdiv4si;
41448 case IX86_BUILTIN_SCATTERDIV4DI:
41449 icode = CODE_FOR_avx512vl_scatterdiv4di;
41451 case IX86_BUILTIN_SCATTERDIV2DI:
41452 icode = CODE_FOR_avx512vl_scatterdiv2di;
41454 case IX86_BUILTIN_GATHERPFDPD:
41455 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
41456 goto vec_prefetch_gen;
41457 case IX86_BUILTIN_SCATTERALTSIV8DF:
41458 icode = CODE_FOR_avx512f_scattersiv8df;
41460 case IX86_BUILTIN_SCATTERALTDIV16SF:
41461 icode = CODE_FOR_avx512f_scatterdiv16sf;
41463 case IX86_BUILTIN_SCATTERALTSIV8DI:
41464 icode = CODE_FOR_avx512f_scattersiv8di;
41466 case IX86_BUILTIN_SCATTERALTDIV16SI:
41467 icode = CODE_FOR_avx512f_scatterdiv16si;
41469 case IX86_BUILTIN_GATHERPFDPS:
41470 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
41471 goto vec_prefetch_gen;
41472 case IX86_BUILTIN_GATHERPFQPD:
41473 icode = CODE_FOR_avx512pf_gatherpfv8didf;
41474 goto vec_prefetch_gen;
41475 case IX86_BUILTIN_GATHERPFQPS:
41476 icode = CODE_FOR_avx512pf_gatherpfv8disf;
41477 goto vec_prefetch_gen;
41478 case IX86_BUILTIN_SCATTERPFDPD:
41479 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
41480 goto vec_prefetch_gen;
41481 case IX86_BUILTIN_SCATTERPFDPS:
41482 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
41483 goto vec_prefetch_gen;
41484 case IX86_BUILTIN_SCATTERPFQPD:
41485 icode = CODE_FOR_avx512pf_scatterpfv8didf;
41486 goto vec_prefetch_gen;
41487 case IX86_BUILTIN_SCATTERPFQPS:
41488 icode = CODE_FOR_avx512pf_scatterpfv8disf;
41489 goto vec_prefetch_gen;
41493 rtx (*gen) (rtx, rtx);
41495 arg0 = CALL_EXPR_ARG (exp, 0);
41496 arg1 = CALL_EXPR_ARG (exp, 1);
41497 arg2 = CALL_EXPR_ARG (exp, 2);
41498 arg3 = CALL_EXPR_ARG (exp, 3);
41499 arg4 = CALL_EXPR_ARG (exp, 4);
41500 op0 = expand_normal (arg0);
41501 op1 = expand_normal (arg1);
41502 op2 = expand_normal (arg2);
41503 op3 = expand_normal (arg3);
41504 op4 = expand_normal (arg4);
41505 /* Note the arg order is different from the operand order. */
41506 mode0 = insn_data[icode].operand[1].mode;
41507 mode2 = insn_data[icode].operand[3].mode;
41508 mode3 = insn_data[icode].operand[4].mode;
41509 mode4 = insn_data[icode].operand[5].mode;
41511 if (target == NULL_RTX
41512 || GET_MODE (target) != insn_data[icode].operand[0].mode
41513 || !insn_data[icode].operand[0].predicate (target,
41514 GET_MODE (target)))
41515 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
41517 subtarget = target;
41521 case IX86_BUILTIN_GATHER3ALTSIV8DF:
41522 case IX86_BUILTIN_GATHER3ALTSIV8DI:
41523 half = gen_reg_rtx (V8SImode);
41524 if (!nonimmediate_operand (op2, V16SImode))
41525 op2 = copy_to_mode_reg (V16SImode, op2);
41526 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41529 case IX86_BUILTIN_GATHER3ALTSIV4DF:
41530 case IX86_BUILTIN_GATHER3ALTSIV4DI:
41531 case IX86_BUILTIN_GATHERALTSIV4DF:
41532 case IX86_BUILTIN_GATHERALTSIV4DI:
41533 half = gen_reg_rtx (V4SImode);
41534 if (!nonimmediate_operand (op2, V8SImode))
41535 op2 = copy_to_mode_reg (V8SImode, op2);
41536 emit_insn (gen_vec_extract_lo_v8si (half, op2));
41539 case IX86_BUILTIN_GATHER3ALTDIV16SF:
41540 case IX86_BUILTIN_GATHER3ALTDIV16SI:
41541 half = gen_reg_rtx (mode0);
41542 if (mode0 == V8SFmode)
41543 gen = gen_vec_extract_lo_v16sf;
41545 gen = gen_vec_extract_lo_v16si;
41546 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41547 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41548 emit_insn (gen (half, op0));
41550 if (GET_MODE (op3) != VOIDmode)
41552 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41553 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41554 emit_insn (gen (half, op3));
41558 case IX86_BUILTIN_GATHER3ALTDIV8SF:
41559 case IX86_BUILTIN_GATHER3ALTDIV8SI:
41560 case IX86_BUILTIN_GATHERALTDIV8SF:
41561 case IX86_BUILTIN_GATHERALTDIV8SI:
41562 half = gen_reg_rtx (mode0);
41563 if (mode0 == V4SFmode)
41564 gen = gen_vec_extract_lo_v8sf;
41566 gen = gen_vec_extract_lo_v8si;
41567 if (!nonimmediate_operand (op0, GET_MODE (op0)))
41568 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
41569 emit_insn (gen (half, op0));
41571 if (GET_MODE (op3) != VOIDmode)
41573 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41574 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41575 emit_insn (gen (half, op3));
41583 /* Force memory operand only with base register here. But we
41584 don't want to do it on memory operand for other builtin
41586 op1 = ix86_zero_extend_to_Pmode (op1);
41588 if (!insn_data[icode].operand[1].predicate (op0, mode0))
41589 op0 = copy_to_mode_reg (mode0, op0);
41590 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
41591 op1 = copy_to_mode_reg (Pmode, op1);
41592 if (!insn_data[icode].operand[3].predicate (op2, mode2))
41593 op2 = copy_to_mode_reg (mode2, op2);
41595 op3 = fixup_modeless_constant (op3, mode3);
41597 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
41599 if (!insn_data[icode].operand[4].predicate (op3, mode3))
41600 op3 = copy_to_mode_reg (mode3, op3);
41604 op3 = copy_to_reg (op3);
41605 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
41607 if (!insn_data[icode].operand[5].predicate (op4, mode4))
41609 error ("the last argument must be scale 1, 2, 4, 8");
41613 /* Optimize. If mask is known to have all high bits set,
41614 replace op0 with pc_rtx to signal that the instruction
41615 overwrites the whole destination and doesn't use its
41616 previous contents. */
41619 if (TREE_CODE (arg3) == INTEGER_CST)
41621 if (integer_all_onesp (arg3))
41624 else if (TREE_CODE (arg3) == VECTOR_CST)
41626 unsigned int negative = 0;
41627 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
41629 tree cst = VECTOR_CST_ELT (arg3, i);
41630 if (TREE_CODE (cst) == INTEGER_CST
41631 && tree_int_cst_sign_bit (cst))
41633 else if (TREE_CODE (cst) == REAL_CST
41634 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
41637 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
41640 else if (TREE_CODE (arg3) == SSA_NAME
41641 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
41643 /* Recognize also when mask is like:
41644 __v2df src = _mm_setzero_pd ();
41645 __v2df mask = _mm_cmpeq_pd (src, src);
41647 __v8sf src = _mm256_setzero_ps ();
41648 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
41649 as that is a cheaper way to load all ones into
41650 a register than having to load a constant from
41652 gimple *def_stmt = SSA_NAME_DEF_STMT (arg3);
41653 if (is_gimple_call (def_stmt))
41655 tree fndecl = gimple_call_fndecl (def_stmt);
41657 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
41658 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
41660 case IX86_BUILTIN_CMPPD:
41661 case IX86_BUILTIN_CMPPS:
41662 case IX86_BUILTIN_CMPPD256:
41663 case IX86_BUILTIN_CMPPS256:
41664 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
41667 case IX86_BUILTIN_CMPEQPD:
41668 case IX86_BUILTIN_CMPEQPS:
41669 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
41670 && initializer_zerop (gimple_call_arg (def_stmt,
41681 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
41688 case IX86_BUILTIN_GATHER3DIV16SF:
41689 if (target == NULL_RTX)
41690 target = gen_reg_rtx (V8SFmode);
41691 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
41693 case IX86_BUILTIN_GATHER3DIV16SI:
41694 if (target == NULL_RTX)
41695 target = gen_reg_rtx (V8SImode);
41696 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
41698 case IX86_BUILTIN_GATHER3DIV8SF:
41699 case IX86_BUILTIN_GATHERDIV8SF:
41700 if (target == NULL_RTX)
41701 target = gen_reg_rtx (V4SFmode);
41702 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
41704 case IX86_BUILTIN_GATHER3DIV8SI:
41705 case IX86_BUILTIN_GATHERDIV8SI:
41706 if (target == NULL_RTX)
41707 target = gen_reg_rtx (V4SImode);
41708 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
41711 target = subtarget;
41717 arg0 = CALL_EXPR_ARG (exp, 0);
41718 arg1 = CALL_EXPR_ARG (exp, 1);
41719 arg2 = CALL_EXPR_ARG (exp, 2);
41720 arg3 = CALL_EXPR_ARG (exp, 3);
41721 arg4 = CALL_EXPR_ARG (exp, 4);
41722 op0 = expand_normal (arg0);
41723 op1 = expand_normal (arg1);
41724 op2 = expand_normal (arg2);
41725 op3 = expand_normal (arg3);
41726 op4 = expand_normal (arg4);
41727 mode1 = insn_data[icode].operand[1].mode;
41728 mode2 = insn_data[icode].operand[2].mode;
41729 mode3 = insn_data[icode].operand[3].mode;
41730 mode4 = insn_data[icode].operand[4].mode;
41732 /* Scatter instruction stores operand op3 to memory with
41733 indices from op2 and scale from op4 under writemask op1.
41734 If index operand op2 has more elements then source operand
41735 op3 one need to use only its low half. And vice versa. */
41738 case IX86_BUILTIN_SCATTERALTSIV8DF:
41739 case IX86_BUILTIN_SCATTERALTSIV8DI:
41740 half = gen_reg_rtx (V8SImode);
41741 if (!nonimmediate_operand (op2, V16SImode))
41742 op2 = copy_to_mode_reg (V16SImode, op2);
41743 emit_insn (gen_vec_extract_lo_v16si (half, op2));
41746 case IX86_BUILTIN_SCATTERALTDIV16SF:
41747 case IX86_BUILTIN_SCATTERALTDIV16SI:
41748 half = gen_reg_rtx (mode3);
41749 if (mode3 == V8SFmode)
41750 gen = gen_vec_extract_lo_v16sf;
41752 gen = gen_vec_extract_lo_v16si;
41753 if (!nonimmediate_operand (op3, GET_MODE (op3)))
41754 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
41755 emit_insn (gen (half, op3));
41762 /* Force memory operand only with base register here. But we
41763 don't want to do it on memory operand for other builtin
41765 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
41767 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
41768 op0 = copy_to_mode_reg (Pmode, op0);
41770 op1 = fixup_modeless_constant (op1, mode1);
41772 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
41774 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41775 op1 = copy_to_mode_reg (mode1, op1);
41779 op1 = copy_to_reg (op1);
41780 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
41783 if (!insn_data[icode].operand[2].predicate (op2, mode2))
41784 op2 = copy_to_mode_reg (mode2, op2);
41786 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41787 op3 = copy_to_mode_reg (mode3, op3);
41789 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41791 error ("the last argument must be scale 1, 2, 4, 8");
41795 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41803 arg0 = CALL_EXPR_ARG (exp, 0);
41804 arg1 = CALL_EXPR_ARG (exp, 1);
41805 arg2 = CALL_EXPR_ARG (exp, 2);
41806 arg3 = CALL_EXPR_ARG (exp, 3);
41807 arg4 = CALL_EXPR_ARG (exp, 4);
41808 op0 = expand_normal (arg0);
41809 op1 = expand_normal (arg1);
41810 op2 = expand_normal (arg2);
41811 op3 = expand_normal (arg3);
41812 op4 = expand_normal (arg4);
41813 mode0 = insn_data[icode].operand[0].mode;
41814 mode1 = insn_data[icode].operand[1].mode;
41815 mode3 = insn_data[icode].operand[3].mode;
41816 mode4 = insn_data[icode].operand[4].mode;
41818 op0 = fixup_modeless_constant (op0, mode0);
41820 if (GET_MODE (op0) == mode0
41821 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
41823 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41824 op0 = copy_to_mode_reg (mode0, op0);
41826 else if (op0 != constm1_rtx)
41828 op0 = copy_to_reg (op0);
41829 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
41832 if (!insn_data[icode].operand[1].predicate (op1, mode1))
41833 op1 = copy_to_mode_reg (mode1, op1);
41835 /* Force memory operand only with base register here. But we
41836 don't want to do it on memory operand for other builtin
41838 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
41840 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
41841 op2 = copy_to_mode_reg (Pmode, op2);
41843 if (!insn_data[icode].operand[3].predicate (op3, mode3))
41845 error ("the forth argument must be scale 1, 2, 4, 8");
41849 if (!insn_data[icode].operand[4].predicate (op4, mode4))
41851 error ("incorrect hint operand");
41855 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
41863 case IX86_BUILTIN_XABORT:
41864 icode = CODE_FOR_xabort;
41865 arg0 = CALL_EXPR_ARG (exp, 0);
41866 op0 = expand_normal (arg0);
41867 mode0 = insn_data[icode].operand[0].mode;
41868 if (!insn_data[icode].operand[0].predicate (op0, mode0))
41870 error ("the xabort's argument must be an 8-bit immediate");
41873 emit_insn (gen_xabort (op0));
41880 for (i = 0, d = bdesc_special_args;
41881 i < ARRAY_SIZE (bdesc_special_args);
41883 if (d->code == fcode)
41884 return ix86_expand_special_args_builtin (d, exp, target);
41886 for (i = 0, d = bdesc_args;
41887 i < ARRAY_SIZE (bdesc_args);
41889 if (d->code == fcode)
41892 case IX86_BUILTIN_FABSQ:
41893 case IX86_BUILTIN_COPYSIGNQ:
41895 /* Emit a normal call if SSE isn't available. */
41896 return expand_call (exp, target, ignore);
41898 return ix86_expand_args_builtin (d, exp, target);
41901 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
41902 if (d->code == fcode)
41903 return ix86_expand_sse_comi (d, exp, target);
41905 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
41906 if (d->code == fcode)
41907 return ix86_expand_round_builtin (d, exp, target);
41909 for (i = 0, d = bdesc_pcmpestr;
41910 i < ARRAY_SIZE (bdesc_pcmpestr);
41912 if (d->code == fcode)
41913 return ix86_expand_sse_pcmpestr (d, exp, target);
41915 for (i = 0, d = bdesc_pcmpistr;
41916 i < ARRAY_SIZE (bdesc_pcmpistr);
41918 if (d->code == fcode)
41919 return ix86_expand_sse_pcmpistr (d, exp, target);
41921 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
41922 if (d->code == fcode)
41923 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
41924 (enum ix86_builtin_func_type)
41925 d->flag, d->comparison);
41927 gcc_unreachable ();
41930 /* This returns the target-specific builtin with code CODE if
41931 current_function_decl has visibility on this builtin, which is checked
41932 using isa flags. Returns NULL_TREE otherwise. */
41934 static tree ix86_get_builtin (enum ix86_builtins code)
41936 struct cl_target_option *opts;
41937 tree target_tree = NULL_TREE;
41939 /* Determine the isa flags of current_function_decl. */
41941 if (current_function_decl)
41942 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
41944 if (target_tree == NULL)
41945 target_tree = target_option_default_node;
41947 opts = TREE_TARGET_OPTION (target_tree);
41949 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
41950 return ix86_builtin_decl (code, true);
41955 /* Return function decl for target specific builtin
41956 for given MPX builtin passed i FCODE. */
41958 ix86_builtin_mpx_function (unsigned fcode)
41962 case BUILT_IN_CHKP_BNDMK:
41963 return ix86_builtins[IX86_BUILTIN_BNDMK];
41965 case BUILT_IN_CHKP_BNDSTX:
41966 return ix86_builtins[IX86_BUILTIN_BNDSTX];
41968 case BUILT_IN_CHKP_BNDLDX:
41969 return ix86_builtins[IX86_BUILTIN_BNDLDX];
41971 case BUILT_IN_CHKP_BNDCL:
41972 return ix86_builtins[IX86_BUILTIN_BNDCL];
41974 case BUILT_IN_CHKP_BNDCU:
41975 return ix86_builtins[IX86_BUILTIN_BNDCU];
41977 case BUILT_IN_CHKP_BNDRET:
41978 return ix86_builtins[IX86_BUILTIN_BNDRET];
41980 case BUILT_IN_CHKP_INTERSECT:
41981 return ix86_builtins[IX86_BUILTIN_BNDINT];
41983 case BUILT_IN_CHKP_NARROW:
41984 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
41986 case BUILT_IN_CHKP_SIZEOF:
41987 return ix86_builtins[IX86_BUILTIN_SIZEOF];
41989 case BUILT_IN_CHKP_EXTRACT_LOWER:
41990 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
41992 case BUILT_IN_CHKP_EXTRACT_UPPER:
41993 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
41999 gcc_unreachable ();
42002 /* Helper function for ix86_load_bounds and ix86_store_bounds.
42004 Return an address to be used to load/store bounds for pointer
42007 SLOT_NO is an integer constant holding number of a target
42008 dependent special slot to be used in case SLOT is not a memory.
42010 SPECIAL_BASE is a pointer to be used as a base of fake address
42011 to access special slots in Bounds Table. SPECIAL_BASE[-1],
42012 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
42015 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
42019 /* NULL slot means we pass bounds for pointer not passed to the
42020 function at all. Register slot means we pass pointer in a
42021 register. In both these cases bounds are passed via Bounds
42022 Table. Since we do not have actual pointer stored in memory,
42023 we have to use fake addresses to access Bounds Table. We
42024 start with (special_base - sizeof (void*)) and decrease this
42025 address by pointer size to get addresses for other slots. */
42026 if (!slot || REG_P (slot))
42028 gcc_assert (CONST_INT_P (slot_no));
42029 addr = plus_constant (Pmode, special_base,
42030 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
42032 /* If pointer is passed in a memory then its address is used to
42033 access Bounds Table. */
42034 else if (MEM_P (slot))
42036 addr = XEXP (slot, 0);
42037 if (!register_operand (addr, Pmode))
42038 addr = copy_addr_to_reg (addr);
42041 gcc_unreachable ();
42046 /* Expand pass uses this hook to load bounds for function parameter
42047 PTR passed in SLOT in case its bounds are not passed in a register.
42049 If SLOT is a memory, then bounds are loaded as for regular pointer
42050 loaded from memory. PTR may be NULL in case SLOT is a memory.
42051 In such case value of PTR (if required) may be loaded from SLOT.
42053 If SLOT is NULL or a register then SLOT_NO is an integer constant
42054 holding number of the target dependent special slot which should be
42055 used to obtain bounds.
42057 Return loaded bounds. */
42060 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
42062 rtx reg = gen_reg_rtx (BNDmode);
42065 /* Get address to be used to access Bounds Table. Special slots start
42066 at the location of return address of the current function. */
42067 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
42069 /* Load pointer value from a memory if we don't have it. */
42072 gcc_assert (MEM_P (slot));
42073 ptr = copy_addr_to_reg (slot);
42076 if (!register_operand (ptr, Pmode))
42077 ptr = ix86_zero_extend_to_Pmode (ptr);
42079 emit_insn (BNDmode == BND64mode
42080 ? gen_bnd64_ldx (reg, addr, ptr)
42081 : gen_bnd32_ldx (reg, addr, ptr));
42086 /* Expand pass uses this hook to store BOUNDS for call argument PTR
42087 passed in SLOT in case BOUNDS are not passed in a register.
42089 If SLOT is a memory, then BOUNDS are stored as for regular pointer
42090 stored in memory. PTR may be NULL in case SLOT is a memory.
42091 In such case value of PTR (if required) may be loaded from SLOT.
42093 If SLOT is NULL or a register then SLOT_NO is an integer constant
42094 holding number of the target dependent special slot which should be
42095 used to store BOUNDS. */
42098 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
42102 /* Get address to be used to access Bounds Table. Special slots start
42103 at the location of return address of a called function. */
42104 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
42106 /* Load pointer value from a memory if we don't have it. */
42109 gcc_assert (MEM_P (slot));
42110 ptr = copy_addr_to_reg (slot);
42113 if (!register_operand (ptr, Pmode))
42114 ptr = ix86_zero_extend_to_Pmode (ptr);
42116 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
42117 if (!register_operand (bounds, BNDmode))
42118 bounds = copy_to_mode_reg (BNDmode, bounds);
42120 emit_insn (BNDmode == BND64mode
42121 ? gen_bnd64_stx (addr, ptr, bounds)
42122 : gen_bnd32_stx (addr, ptr, bounds));
42125 /* Load and return bounds returned by function in SLOT. */
42128 ix86_load_returned_bounds (rtx slot)
42132 gcc_assert (REG_P (slot));
42133 res = gen_reg_rtx (BNDmode);
42134 emit_move_insn (res, slot);
42139 /* Store BOUNDS returned by function into SLOT. */
42142 ix86_store_returned_bounds (rtx slot, rtx bounds)
42144 gcc_assert (REG_P (slot));
42145 emit_move_insn (slot, bounds);
42148 /* Returns a function decl for a vectorized version of the combined function
42149 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
42150 if it is not available. */
42153 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
42156 machine_mode in_mode, out_mode;
42159 if (TREE_CODE (type_out) != VECTOR_TYPE
42160 || TREE_CODE (type_in) != VECTOR_TYPE)
42163 out_mode = TYPE_MODE (TREE_TYPE (type_out));
42164 out_n = TYPE_VECTOR_SUBPARTS (type_out);
42165 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42166 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42171 if (out_mode == SFmode && in_mode == SFmode)
42173 if (out_n == 16 && in_n == 16)
42174 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
42181 /* The round insn does not trap on denormals. */
42182 if (flag_trapping_math || !TARGET_ROUND)
42185 if (out_mode == SImode && in_mode == DFmode)
42187 if (out_n == 4 && in_n == 2)
42188 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
42189 else if (out_n == 8 && in_n == 4)
42190 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
42191 else if (out_n == 16 && in_n == 8)
42192 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
42194 if (out_mode == SImode && in_mode == SFmode)
42196 if (out_n == 4 && in_n == 4)
42197 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
42198 else if (out_n == 8 && in_n == 8)
42199 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
42206 /* The round insn does not trap on denormals. */
42207 if (flag_trapping_math || !TARGET_ROUND)
42210 if (out_mode == SImode && in_mode == DFmode)
42212 if (out_n == 4 && in_n == 2)
42213 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
42214 else if (out_n == 8 && in_n == 4)
42215 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
42216 else if (out_n == 16 && in_n == 8)
42217 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
42219 if (out_mode == SImode && in_mode == SFmode)
42221 if (out_n == 4 && in_n == 4)
42222 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
42223 else if (out_n == 8 && in_n == 8)
42224 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
42231 if (out_mode == SImode && in_mode == DFmode)
42233 if (out_n == 4 && in_n == 2)
42234 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
42235 else if (out_n == 8 && in_n == 4)
42236 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
42238 if (out_mode == SImode && in_mode == SFmode)
42240 if (out_n == 4 && in_n == 4)
42241 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
42242 else if (out_n == 8 && in_n == 8)
42243 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
42250 /* The round insn does not trap on denormals. */
42251 if (flag_trapping_math || !TARGET_ROUND)
42254 if (out_mode == SImode && in_mode == DFmode)
42256 if (out_n == 4 && in_n == 2)
42257 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
42258 else if (out_n == 8 && in_n == 4)
42259 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
42260 else if (out_n == 16 && in_n == 8)
42261 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
42263 if (out_mode == SImode && in_mode == SFmode)
42265 if (out_n == 4 && in_n == 4)
42266 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
42267 else if (out_n == 8 && in_n == 8)
42268 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
42273 /* The round insn does not trap on denormals. */
42274 if (flag_trapping_math || !TARGET_ROUND)
42277 if (out_mode == DFmode && in_mode == DFmode)
42279 if (out_n == 2 && in_n == 2)
42280 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
42281 else if (out_n == 4 && in_n == 4)
42282 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
42284 if (out_mode == SFmode && in_mode == SFmode)
42286 if (out_n == 4 && in_n == 4)
42287 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
42288 else if (out_n == 8 && in_n == 8)
42289 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
42294 /* The round insn does not trap on denormals. */
42295 if (flag_trapping_math || !TARGET_ROUND)
42298 if (out_mode == DFmode && in_mode == DFmode)
42300 if (out_n == 2 && in_n == 2)
42301 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
42302 else if (out_n == 4 && in_n == 4)
42303 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
42305 if (out_mode == SFmode && in_mode == SFmode)
42307 if (out_n == 4 && in_n == 4)
42308 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
42309 else if (out_n == 8 && in_n == 8)
42310 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
42315 /* The round insn does not trap on denormals. */
42316 if (flag_trapping_math || !TARGET_ROUND)
42319 if (out_mode == DFmode && in_mode == DFmode)
42321 if (out_n == 2 && in_n == 2)
42322 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
42323 else if (out_n == 4 && in_n == 4)
42324 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
42326 if (out_mode == SFmode && in_mode == SFmode)
42328 if (out_n == 4 && in_n == 4)
42329 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
42330 else if (out_n == 8 && in_n == 8)
42331 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
42336 /* The round insn does not trap on denormals. */
42337 if (flag_trapping_math || !TARGET_ROUND)
42340 if (out_mode == DFmode && in_mode == DFmode)
42342 if (out_n == 2 && in_n == 2)
42343 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
42344 else if (out_n == 4 && in_n == 4)
42345 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
42347 if (out_mode == SFmode && in_mode == SFmode)
42349 if (out_n == 4 && in_n == 4)
42350 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
42351 else if (out_n == 8 && in_n == 8)
42352 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
42357 if (out_mode == DFmode && in_mode == DFmode)
42359 if (out_n == 2 && in_n == 2)
42360 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
42361 if (out_n == 4 && in_n == 4)
42362 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
42364 if (out_mode == SFmode && in_mode == SFmode)
42366 if (out_n == 4 && in_n == 4)
42367 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
42368 if (out_n == 8 && in_n == 8)
42369 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
42377 /* Dispatch to a handler for a vectorization library. */
42378 if (ix86_veclib_handler)
42379 return ix86_veclib_handler (combined_fn (fn), type_out, type_in);
42384 /* Handler for an SVML-style interface to
42385 a library with vectorized intrinsics. */
42388 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
42391 tree fntype, new_fndecl, args;
42394 machine_mode el_mode, in_mode;
42397 /* The SVML is suitable for unsafe math only. */
42398 if (!flag_unsafe_math_optimizations)
42401 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42402 n = TYPE_VECTOR_SUBPARTS (type_out);
42403 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42404 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42405 if (el_mode != in_mode
42429 if ((el_mode != DFmode || n != 2)
42430 && (el_mode != SFmode || n != 4))
42438 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42439 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42441 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
42442 strcpy (name, "vmlsLn4");
42443 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
42444 strcpy (name, "vmldLn2");
42447 sprintf (name, "vmls%s", bname+10);
42448 name[strlen (name)-1] = '4';
42451 sprintf (name, "vmld%s2", bname+10);
42453 /* Convert to uppercase. */
42457 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42461 fntype = build_function_type_list (type_out, type_in, NULL);
42463 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42465 /* Build a function declaration for the vectorized function. */
42466 new_fndecl = build_decl (BUILTINS_LOCATION,
42467 FUNCTION_DECL, get_identifier (name), fntype);
42468 TREE_PUBLIC (new_fndecl) = 1;
42469 DECL_EXTERNAL (new_fndecl) = 1;
42470 DECL_IS_NOVOPS (new_fndecl) = 1;
42471 TREE_READONLY (new_fndecl) = 1;
42476 /* Handler for an ACML-style interface to
42477 a library with vectorized intrinsics. */
42480 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
42482 char name[20] = "__vr.._";
42483 tree fntype, new_fndecl, args;
42486 machine_mode el_mode, in_mode;
42489 /* The ACML is 64bits only and suitable for unsafe math only as
42490 it does not correctly support parts of IEEE with the required
42491 precision such as denormals. */
42493 || !flag_unsafe_math_optimizations)
42496 el_mode = TYPE_MODE (TREE_TYPE (type_out));
42497 n = TYPE_VECTOR_SUBPARTS (type_out);
42498 in_mode = TYPE_MODE (TREE_TYPE (type_in));
42499 in_n = TYPE_VECTOR_SUBPARTS (type_in);
42500 if (el_mode != in_mode
42512 if (el_mode == DFmode && n == 2)
42517 else if (el_mode == SFmode && n == 4)
42530 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
42531 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
42532 sprintf (name + 7, "%s", bname+10);
42535 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
42539 fntype = build_function_type_list (type_out, type_in, NULL);
42541 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
42543 /* Build a function declaration for the vectorized function. */
42544 new_fndecl = build_decl (BUILTINS_LOCATION,
42545 FUNCTION_DECL, get_identifier (name), fntype);
42546 TREE_PUBLIC (new_fndecl) = 1;
42547 DECL_EXTERNAL (new_fndecl) = 1;
42548 DECL_IS_NOVOPS (new_fndecl) = 1;
42549 TREE_READONLY (new_fndecl) = 1;
42554 /* Returns a decl of a function that implements gather load with
42555 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
42556 Return NULL_TREE if it is not available. */
42559 ix86_vectorize_builtin_gather (const_tree mem_vectype,
42560 const_tree index_type, int scale)
42563 enum ix86_builtins code;
42568 if ((TREE_CODE (index_type) != INTEGER_TYPE
42569 && !POINTER_TYPE_P (index_type))
42570 || (TYPE_MODE (index_type) != SImode
42571 && TYPE_MODE (index_type) != DImode))
42574 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42577 /* v*gather* insn sign extends index to pointer mode. */
42578 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42579 && TYPE_UNSIGNED (index_type))
42584 || (scale & (scale - 1)) != 0)
42587 si = TYPE_MODE (index_type) == SImode;
42588 switch (TYPE_MODE (mem_vectype))
42591 if (TARGET_AVX512VL)
42592 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
42594 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
42597 if (TARGET_AVX512VL)
42598 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
42600 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
42603 if (TARGET_AVX512VL)
42604 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
42606 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
42609 if (TARGET_AVX512VL)
42610 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
42612 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
42615 if (TARGET_AVX512VL)
42616 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
42618 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
42621 if (TARGET_AVX512VL)
42622 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
42624 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
42627 if (TARGET_AVX512VL)
42628 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
42630 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
42633 if (TARGET_AVX512VL)
42634 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
42636 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
42639 if (TARGET_AVX512F)
42640 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
42645 if (TARGET_AVX512F)
42646 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
42651 if (TARGET_AVX512F)
42652 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
42657 if (TARGET_AVX512F)
42658 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
42666 return ix86_get_builtin (code);
42669 /* Returns a decl of a function that implements scatter store with
42670 register type VECTYPE and index type INDEX_TYPE and SCALE.
42671 Return NULL_TREE if it is not available. */
42674 ix86_vectorize_builtin_scatter (const_tree vectype,
42675 const_tree index_type, int scale)
42678 enum ix86_builtins code;
42680 if (!TARGET_AVX512F)
42683 if ((TREE_CODE (index_type) != INTEGER_TYPE
42684 && !POINTER_TYPE_P (index_type))
42685 || (TYPE_MODE (index_type) != SImode
42686 && TYPE_MODE (index_type) != DImode))
42689 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
42692 /* v*scatter* insn sign extends index to pointer mode. */
42693 if (TYPE_PRECISION (index_type) < POINTER_SIZE
42694 && TYPE_UNSIGNED (index_type))
42697 /* Scale can be 1, 2, 4 or 8. */
42700 || (scale & (scale - 1)) != 0)
42703 si = TYPE_MODE (index_type) == SImode;
42704 switch (TYPE_MODE (vectype))
42707 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
42710 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
42713 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
42716 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
42722 return ix86_builtins[code];
42725 /* Return true if it is safe to use the rsqrt optabs to optimize
42731 return (TARGET_SSE_MATH
42732 && flag_finite_math_only
42733 && !flag_trapping_math
42734 && flag_unsafe_math_optimizations);
42737 /* Returns a code for a target-specific builtin that implements
42738 reciprocal of the function, or NULL_TREE if not available. */
42741 ix86_builtin_reciprocal (tree fndecl)
42743 switch (DECL_FUNCTION_CODE (fndecl))
42745 /* Vectorized version of sqrt to rsqrt conversion. */
42746 case IX86_BUILTIN_SQRTPS_NR:
42747 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
42749 case IX86_BUILTIN_SQRTPS_NR256:
42750 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
42757 /* Helper for avx_vpermilps256_operand et al. This is also used by
42758 the expansion functions to turn the parallel back into a mask.
42759 The return value is 0 for no match and the imm8+1 for a match. */
42762 avx_vpermilp_parallel (rtx par, machine_mode mode)
42764 unsigned i, nelt = GET_MODE_NUNITS (mode);
42766 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
42768 if (XVECLEN (par, 0) != (int) nelt)
42771 /* Validate that all of the elements are constants, and not totally
42772 out of range. Copy the data into an integral array to make the
42773 subsequent checks easier. */
42774 for (i = 0; i < nelt; ++i)
42776 rtx er = XVECEXP (par, 0, i);
42777 unsigned HOST_WIDE_INT ei;
42779 if (!CONST_INT_P (er))
42790 /* In the 512-bit DFmode case, we can only move elements within
42791 a 128-bit lane. First fill the second part of the mask,
42793 for (i = 4; i < 6; ++i)
42795 if (ipar[i] < 4 || ipar[i] >= 6)
42797 mask |= (ipar[i] - 4) << i;
42799 for (i = 6; i < 8; ++i)
42803 mask |= (ipar[i] - 6) << i;
42808 /* In the 256-bit DFmode case, we can only move elements within
42810 for (i = 0; i < 2; ++i)
42814 mask |= ipar[i] << i;
42816 for (i = 2; i < 4; ++i)
42820 mask |= (ipar[i] - 2) << i;
42825 /* In 512 bit SFmode case, permutation in the upper 256 bits
42826 must mirror the permutation in the lower 256-bits. */
42827 for (i = 0; i < 8; ++i)
42828 if (ipar[i] + 8 != ipar[i + 8])
42833 /* In 256 bit SFmode case, we have full freedom of
42834 movement within the low 128-bit lane, but the high 128-bit
42835 lane must mirror the exact same pattern. */
42836 for (i = 0; i < 4; ++i)
42837 if (ipar[i] + 4 != ipar[i + 4])
42844 /* In the 128-bit case, we've full freedom in the placement of
42845 the elements from the source operand. */
42846 for (i = 0; i < nelt; ++i)
42847 mask |= ipar[i] << (i * (nelt / 2));
42851 gcc_unreachable ();
42854 /* Make sure success has a non-zero value by adding one. */
42858 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
42859 the expansion functions to turn the parallel back into a mask.
42860 The return value is 0 for no match and the imm8+1 for a match. */
42863 avx_vperm2f128_parallel (rtx par, machine_mode mode)
42865 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
42867 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
42869 if (XVECLEN (par, 0) != (int) nelt)
42872 /* Validate that all of the elements are constants, and not totally
42873 out of range. Copy the data into an integral array to make the
42874 subsequent checks easier. */
42875 for (i = 0; i < nelt; ++i)
42877 rtx er = XVECEXP (par, 0, i);
42878 unsigned HOST_WIDE_INT ei;
42880 if (!CONST_INT_P (er))
42883 if (ei >= 2 * nelt)
42888 /* Validate that the halves of the permute are halves. */
42889 for (i = 0; i < nelt2 - 1; ++i)
42890 if (ipar[i] + 1 != ipar[i + 1])
42892 for (i = nelt2; i < nelt - 1; ++i)
42893 if (ipar[i] + 1 != ipar[i + 1])
42896 /* Reconstruct the mask. */
42897 for (i = 0; i < 2; ++i)
42899 unsigned e = ipar[i * nelt2];
42903 mask |= e << (i * 4);
42906 /* Make sure success has a non-zero value by adding one. */
42910 /* Return a register priority for hard reg REGNO. */
42912 ix86_register_priority (int hard_regno)
42914 /* ebp and r13 as the base always wants a displacement, r12 as the
42915 base always wants an index. So discourage their usage in an
42917 if (hard_regno == R12_REG || hard_regno == R13_REG)
42919 if (hard_regno == BP_REG)
42921 /* New x86-64 int registers result in bigger code size. Discourage
42923 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
42925 /* New x86-64 SSE registers result in bigger code size. Discourage
42927 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
42929 /* Usage of AX register results in smaller code. Prefer it. */
42930 if (hard_regno == AX_REG)
42935 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
42937 Put float CONST_DOUBLE in the constant pool instead of fp regs.
42938 QImode must go into class Q_REGS.
42939 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
42940 movdf to do mem-to-mem moves through integer regs. */
42943 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
42945 machine_mode mode = GET_MODE (x);
42947 /* We're only allowed to return a subclass of CLASS. Many of the
42948 following checks fail for NO_REGS, so eliminate that early. */
42949 if (regclass == NO_REGS)
42952 /* All classes can load zeros. */
42953 if (x == CONST0_RTX (mode))
42956 /* Force constants into memory if we are loading a (nonzero) constant into
42957 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
42958 instructions to load from a constant. */
42960 && (MAYBE_MMX_CLASS_P (regclass)
42961 || MAYBE_SSE_CLASS_P (regclass)
42962 || MAYBE_MASK_CLASS_P (regclass)))
42965 /* Prefer SSE regs only, if we can use them for math. */
42966 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
42967 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
42969 /* Floating-point constants need more complex checks. */
42970 if (CONST_DOUBLE_P (x))
42972 /* General regs can load everything. */
42973 if (reg_class_subset_p (regclass, GENERAL_REGS))
42976 /* Floats can load 0 and 1 plus some others. Note that we eliminated
42977 zero above. We only want to wind up preferring 80387 registers if
42978 we plan on doing computation with them. */
42980 && standard_80387_constant_p (x) > 0)
42982 /* Limit class to non-sse. */
42983 if (regclass == FLOAT_SSE_REGS)
42985 if (regclass == FP_TOP_SSE_REGS)
42987 if (regclass == FP_SECOND_SSE_REGS)
42988 return FP_SECOND_REG;
42989 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
42996 /* Generally when we see PLUS here, it's the function invariant
42997 (plus soft-fp const_int). Which can only be computed into general
42999 if (GET_CODE (x) == PLUS)
43000 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
43002 /* QImode constants are easy to load, but non-constant QImode data
43003 must go into Q_REGS. */
43004 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
43006 if (reg_class_subset_p (regclass, Q_REGS))
43008 if (reg_class_subset_p (Q_REGS, regclass))
43016 /* Discourage putting floating-point values in SSE registers unless
43017 SSE math is being used, and likewise for the 387 registers. */
43019 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
43021 machine_mode mode = GET_MODE (x);
43023 /* Restrict the output reload class to the register bank that we are doing
43024 math on. If we would like not to return a subset of CLASS, reject this
43025 alternative: if reload cannot do this, it will still use its choice. */
43026 mode = GET_MODE (x);
43027 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
43028 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
43030 if (X87_FLOAT_MODE_P (mode))
43032 if (regclass == FP_TOP_SSE_REGS)
43034 else if (regclass == FP_SECOND_SSE_REGS)
43035 return FP_SECOND_REG;
43037 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
43044 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
43045 machine_mode mode, secondary_reload_info *sri)
43047 /* Double-word spills from general registers to non-offsettable memory
43048 references (zero-extended addresses) require special handling. */
43051 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
43052 && INTEGER_CLASS_P (rclass)
43053 && !offsettable_memref_p (x))
43056 ? CODE_FOR_reload_noff_load
43057 : CODE_FOR_reload_noff_store);
43058 /* Add the cost of moving address to a temporary. */
43059 sri->extra_cost = 1;
43064 /* QImode spills from non-QI registers require
43065 intermediate register on 32bit targets. */
43067 && (MAYBE_MASK_CLASS_P (rclass)
43068 || (!TARGET_64BIT && !in_p
43069 && INTEGER_CLASS_P (rclass)
43070 && MAYBE_NON_Q_CLASS_P (rclass))))
43079 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
43080 regno = true_regnum (x);
43082 /* Return Q_REGS if the operand is in memory. */
43087 /* This condition handles corner case where an expression involving
43088 pointers gets vectorized. We're trying to use the address of a
43089 stack slot as a vector initializer.
43091 (set (reg:V2DI 74 [ vect_cst_.2 ])
43092 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
43094 Eventually frame gets turned into sp+offset like this:
43096 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43097 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43098 (const_int 392 [0x188]))))
43100 That later gets turned into:
43102 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43103 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
43104 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
43106 We'll have the following reload recorded:
43108 Reload 0: reload_in (DI) =
43109 (plus:DI (reg/f:DI 7 sp)
43110 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
43111 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43112 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
43113 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
43114 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
43115 reload_reg_rtx: (reg:V2DI 22 xmm1)
43117 Which isn't going to work since SSE instructions can't handle scalar
43118 additions. Returning GENERAL_REGS forces the addition into integer
43119 register and reload can handle subsequent reloads without problems. */
43121 if (in_p && GET_CODE (x) == PLUS
43122 && SSE_CLASS_P (rclass)
43123 && SCALAR_INT_MODE_P (mode))
43124 return GENERAL_REGS;
43129 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
43132 ix86_class_likely_spilled_p (reg_class_t rclass)
43143 case SSE_FIRST_REG:
43145 case FP_SECOND_REG:
43156 /* If we are copying between general and FP registers, we need a memory
43157 location. The same is true for SSE and MMX registers.
43159 To optimize register_move_cost performance, allow inline variant.
43161 The macro can't work reliably when one of the CLASSES is class containing
43162 registers from multiple units (SSE, MMX, integer). We avoid this by never
43163 combining those units in single alternative in the machine description.
43164 Ensure that this constraint holds to avoid unexpected surprises.
43166 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
43167 enforce these sanity checks. */
43170 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43171 machine_mode mode, int strict)
43173 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
43175 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
43176 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
43177 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
43178 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
43179 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
43180 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
43182 gcc_assert (!strict || lra_in_progress);
43186 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
43189 /* Between mask and general, we have moves no larger than word size. */
43190 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
43191 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
43194 /* ??? This is a lie. We do have moves between mmx/general, and for
43195 mmx/sse2. But by saying we need secondary memory we discourage the
43196 register allocator from using the mmx registers unless needed. */
43197 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
43200 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43202 /* SSE1 doesn't have any direct moves from other classes. */
43206 /* If the target says that inter-unit moves are more expensive
43207 than moving through memory, then don't generate them. */
43208 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
43209 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
43212 /* Between SSE and general, we have moves no larger than word size. */
43213 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43221 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
43222 machine_mode mode, int strict)
43224 return inline_secondary_memory_needed (class1, class2, mode, strict);
43227 /* Implement the TARGET_CLASS_MAX_NREGS hook.
43229 On the 80386, this is the size of MODE in words,
43230 except in the FP regs, where a single reg is always enough. */
43232 static unsigned char
43233 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
43235 if (MAYBE_INTEGER_CLASS_P (rclass))
43237 if (mode == XFmode)
43238 return (TARGET_64BIT ? 2 : 3);
43239 else if (mode == XCmode)
43240 return (TARGET_64BIT ? 4 : 6);
43242 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
43246 if (COMPLEX_MODE_P (mode))
43253 /* Return true if the registers in CLASS cannot represent the change from
43254 modes FROM to TO. */
43257 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
43258 enum reg_class regclass)
43263 /* x87 registers can't do subreg at all, as all values are reformatted
43264 to extended precision. */
43265 if (MAYBE_FLOAT_CLASS_P (regclass))
43268 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
43270 int from_size = GET_MODE_SIZE (from);
43271 int to_size = GET_MODE_SIZE (to);
43273 /* Vector registers do not support QI or HImode loads. If we don't
43274 disallow a change to these modes, reload will assume it's ok to
43275 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
43276 the vec_dupv4hi pattern. */
43280 /* Further, we cannot allow word_mode subregs of full vector modes.
43281 Otherwise the middle-end will assume it's ok to store to
43282 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
43283 of the 128-bit register. However, after reload the subreg will
43284 be dropped leaving a plain DImode store. This is indistinguishable
43285 from a "normal" DImode move, and so we're justified to use movsd,
43286 which modifies the entire 128-bit register. */
43287 if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
43294 /* Return the cost of moving data of mode M between a
43295 register and memory. A value of 2 is the default; this cost is
43296 relative to those in `REGISTER_MOVE_COST'.
43298 This function is used extensively by register_move_cost that is used to
43299 build tables at startup. Make it inline in this case.
43300 When IN is 2, return maximum of in and out move cost.
43302 If moving between registers and memory is more expensive than
43303 between two registers, you should define this macro to express the
43306 Model also increased moving costs of QImode registers in non
43310 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
43314 if (FLOAT_CLASS_P (regclass))
43332 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
43333 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
43335 if (SSE_CLASS_P (regclass))
43338 switch (GET_MODE_SIZE (mode))
43353 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
43354 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
43356 if (MMX_CLASS_P (regclass))
43359 switch (GET_MODE_SIZE (mode))
43371 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
43372 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
43374 switch (GET_MODE_SIZE (mode))
43377 if (Q_CLASS_P (regclass) || TARGET_64BIT)
43380 return ix86_cost->int_store[0];
43381 if (TARGET_PARTIAL_REG_DEPENDENCY
43382 && optimize_function_for_speed_p (cfun))
43383 cost = ix86_cost->movzbl_load;
43385 cost = ix86_cost->int_load[0];
43387 return MAX (cost, ix86_cost->int_store[0]);
43393 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
43395 return ix86_cost->movzbl_load;
43397 return ix86_cost->int_store[0] + 4;
43402 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
43403 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
43405 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
43406 if (mode == TFmode)
43409 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
43411 cost = ix86_cost->int_load[2];
43413 cost = ix86_cost->int_store[2];
43414 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
43419 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
43422 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
43426 /* Return the cost of moving data from a register in class CLASS1 to
43427 one in class CLASS2.
43429 It is not required that the cost always equal 2 when FROM is the same as TO;
43430 on some machines it is expensive to move between registers if they are not
43431 general registers. */
43434 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
43435 reg_class_t class2_i)
43437 enum reg_class class1 = (enum reg_class) class1_i;
43438 enum reg_class class2 = (enum reg_class) class2_i;
43440 /* In case we require secondary memory, compute cost of the store followed
43441 by load. In order to avoid bad register allocation choices, we need
43442 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
43444 if (inline_secondary_memory_needed (class1, class2, mode, 0))
43448 cost += inline_memory_move_cost (mode, class1, 2);
43449 cost += inline_memory_move_cost (mode, class2, 2);
43451 /* In case of copying from general_purpose_register we may emit multiple
43452 stores followed by single load causing memory size mismatch stall.
43453 Count this as arbitrarily high cost of 20. */
43454 if (targetm.class_max_nregs (class1, mode)
43455 > targetm.class_max_nregs (class2, mode))
43458 /* In the case of FP/MMX moves, the registers actually overlap, and we
43459 have to switch modes in order to treat them differently. */
43460 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
43461 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
43467 /* Moves between SSE/MMX and integer unit are expensive. */
43468 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
43469 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
43471 /* ??? By keeping returned value relatively high, we limit the number
43472 of moves between integer and MMX/SSE registers for all targets.
43473 Additionally, high value prevents problem with x86_modes_tieable_p(),
43474 where integer modes in MMX/SSE registers are not tieable
43475 because of missing QImode and HImode moves to, from or between
43476 MMX/SSE registers. */
43477 return MAX (8, ix86_cost->mmxsse_to_integer);
43479 if (MAYBE_FLOAT_CLASS_P (class1))
43480 return ix86_cost->fp_move;
43481 if (MAYBE_SSE_CLASS_P (class1))
43482 return ix86_cost->sse_move;
43483 if (MAYBE_MMX_CLASS_P (class1))
43484 return ix86_cost->mmx_move;
43488 /* Return TRUE if hard register REGNO can hold a value of machine-mode
43492 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
43494 /* Flags and only flags can only hold CCmode values. */
43495 if (CC_REGNO_P (regno))
43496 return GET_MODE_CLASS (mode) == MODE_CC;
43497 if (GET_MODE_CLASS (mode) == MODE_CC
43498 || GET_MODE_CLASS (mode) == MODE_RANDOM
43499 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
43501 if (STACK_REGNO_P (regno))
43502 return VALID_FP_MODE_P (mode);
43503 if (MASK_REGNO_P (regno))
43504 return (VALID_MASK_REG_MODE (mode)
43505 || (TARGET_AVX512BW
43506 && VALID_MASK_AVX512BW_MODE (mode)));
43507 if (BND_REGNO_P (regno))
43508 return VALID_BND_REG_MODE (mode);
43509 if (SSE_REGNO_P (regno))
43511 /* We implement the move patterns for all vector modes into and
43512 out of SSE registers, even when no operation instructions
43515 /* For AVX-512 we allow, regardless of regno:
43517 - any of 512-bit wide vector mode
43518 - any scalar mode. */
43521 || VALID_AVX512F_REG_MODE (mode)
43522 || VALID_AVX512F_SCALAR_MODE (mode)))
43525 /* TODO check for QI/HI scalars. */
43526 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
43527 if (TARGET_AVX512VL
43530 || VALID_AVX256_REG_MODE (mode)
43531 || VALID_AVX512VL_128_REG_MODE (mode)))
43534 /* xmm16-xmm31 are only available for AVX-512. */
43535 if (EXT_REX_SSE_REGNO_P (regno))
43538 /* OImode and AVX modes are available only when AVX is enabled. */
43539 return ((TARGET_AVX
43540 && VALID_AVX256_REG_OR_OI_MODE (mode))
43541 || VALID_SSE_REG_MODE (mode)
43542 || VALID_SSE2_REG_MODE (mode)
43543 || VALID_MMX_REG_MODE (mode)
43544 || VALID_MMX_REG_MODE_3DNOW (mode));
43546 if (MMX_REGNO_P (regno))
43548 /* We implement the move patterns for 3DNOW modes even in MMX mode,
43549 so if the register is available at all, then we can move data of
43550 the given mode into or out of it. */
43551 return (VALID_MMX_REG_MODE (mode)
43552 || VALID_MMX_REG_MODE_3DNOW (mode));
43555 if (mode == QImode)
43557 /* Take care for QImode values - they can be in non-QI regs,
43558 but then they do cause partial register stalls. */
43559 if (ANY_QI_REGNO_P (regno))
43561 if (!TARGET_PARTIAL_REG_STALL)
43563 /* LRA checks if the hard register is OK for the given mode.
43564 QImode values can live in non-QI regs, so we allow all
43566 if (lra_in_progress)
43568 return !can_create_pseudo_p ();
43570 /* We handle both integer and floats in the general purpose registers. */
43571 else if (VALID_INT_MODE_P (mode))
43573 else if (VALID_FP_MODE_P (mode))
43575 else if (VALID_DFP_MODE_P (mode))
43577 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
43578 on to use that value in smaller contexts, this can easily force a
43579 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
43580 supporting DImode, allow it. */
43581 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
43587 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
43588 tieable integer mode. */
43591 ix86_tieable_integer_mode_p (machine_mode mode)
43600 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
43603 return TARGET_64BIT;
43610 /* Return true if MODE1 is accessible in a register that can hold MODE2
43611 without copying. That is, all register classes that can hold MODE2
43612 can also hold MODE1. */
43615 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
43617 if (mode1 == mode2)
43620 if (ix86_tieable_integer_mode_p (mode1)
43621 && ix86_tieable_integer_mode_p (mode2))
43624 /* MODE2 being XFmode implies fp stack or general regs, which means we
43625 can tie any smaller floating point modes to it. Note that we do not
43626 tie this with TFmode. */
43627 if (mode2 == XFmode)
43628 return mode1 == SFmode || mode1 == DFmode;
43630 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
43631 that we can tie it with SFmode. */
43632 if (mode2 == DFmode)
43633 return mode1 == SFmode;
43635 /* If MODE2 is only appropriate for an SSE register, then tie with
43636 any other mode acceptable to SSE registers. */
43637 if (GET_MODE_SIZE (mode2) == 32
43638 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43639 return (GET_MODE_SIZE (mode1) == 32
43640 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43641 if (GET_MODE_SIZE (mode2) == 16
43642 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
43643 return (GET_MODE_SIZE (mode1) == 16
43644 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
43646 /* If MODE2 is appropriate for an MMX register, then tie
43647 with any other mode acceptable to MMX registers. */
43648 if (GET_MODE_SIZE (mode2) == 8
43649 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
43650 return (GET_MODE_SIZE (mode1) == 8
43651 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
43656 /* Return the cost of moving between two registers of mode MODE. */
43659 ix86_set_reg_reg_cost (machine_mode mode)
43661 unsigned int units = UNITS_PER_WORD;
43663 switch (GET_MODE_CLASS (mode))
43669 units = GET_MODE_SIZE (CCmode);
43673 if ((TARGET_SSE && mode == TFmode)
43674 || (TARGET_80387 && mode == XFmode)
43675 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
43676 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
43677 units = GET_MODE_SIZE (mode);
43680 case MODE_COMPLEX_FLOAT:
43681 if ((TARGET_SSE && mode == TCmode)
43682 || (TARGET_80387 && mode == XCmode)
43683 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
43684 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
43685 units = GET_MODE_SIZE (mode);
43688 case MODE_VECTOR_INT:
43689 case MODE_VECTOR_FLOAT:
43690 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
43691 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
43692 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
43693 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
43694 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
43695 units = GET_MODE_SIZE (mode);
43698 /* Return the cost of moving between two registers of mode MODE,
43699 assuming that the move will be in pieces of at most UNITS bytes. */
43700 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
43703 /* Compute a (partial) cost for rtx X. Return true if the complete
43704 cost has been computed, and false if subexpressions should be
43705 scanned. In either case, *TOTAL contains the cost result. */
43708 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
43709 int *total, bool speed)
43712 enum rtx_code code = GET_CODE (x);
43713 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
43714 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
43719 if (register_operand (SET_DEST (x), VOIDmode)
43720 && reg_or_0_operand (SET_SRC (x), VOIDmode))
43722 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
43731 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
43733 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
43735 else if (flag_pic && SYMBOLIC_CONST (x)
43737 && (GET_CODE (x) == LABEL_REF
43738 || (GET_CODE (x) == SYMBOL_REF
43739 && SYMBOL_REF_LOCAL_P (x))))
43740 /* Use 0 cost for CONST to improve its propagation. */
43741 && (TARGET_64BIT || GET_CODE (x) != CONST))
43747 case CONST_WIDE_INT:
43752 switch (standard_80387_constant_p (x))
43757 default: /* Other constants */
43764 if (SSE_FLOAT_MODE_P (mode))
43767 switch (standard_sse_constant_p (x))
43771 case 1: /* 0: xor eliminates false dependency */
43774 default: /* -1: cmp contains false dependency */
43779 /* Fall back to (MEM (SYMBOL_REF)), since that's where
43780 it'll probably end up. Add a penalty for size. */
43781 *total = (COSTS_N_INSNS (1)
43782 + (flag_pic != 0 && !TARGET_64BIT)
43783 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
43787 /* The zero extensions is often completely free on x86_64, so make
43788 it as cheap as possible. */
43789 if (TARGET_64BIT && mode == DImode
43790 && GET_MODE (XEXP (x, 0)) == SImode)
43792 else if (TARGET_ZERO_EXTEND_WITH_AND)
43793 *total = cost->add;
43795 *total = cost->movzx;
43799 *total = cost->movsx;
43803 if (SCALAR_INT_MODE_P (mode)
43804 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
43805 && CONST_INT_P (XEXP (x, 1)))
43807 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43810 *total = cost->add;
43813 if ((value == 2 || value == 3)
43814 && cost->lea <= cost->shift_const)
43816 *total = cost->lea;
43826 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43828 /* ??? Should be SSE vector operation cost. */
43829 /* At least for published AMD latencies, this really is the same
43830 as the latency for a simple fpu operation like fabs. */
43831 /* V*QImode is emulated with 1-11 insns. */
43832 if (mode == V16QImode || mode == V32QImode)
43835 if (TARGET_XOP && mode == V16QImode)
43837 /* For XOP we use vpshab, which requires a broadcast of the
43838 value to the variable shift insn. For constants this
43839 means a V16Q const in mem; even when we can perform the
43840 shift with one insn set the cost to prefer paddb. */
43841 if (CONSTANT_P (XEXP (x, 1)))
43843 *total = (cost->fabs
43844 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
43845 + (speed ? 2 : COSTS_N_BYTES (16)));
43850 else if (TARGET_SSSE3)
43852 *total = cost->fabs * count;
43855 *total = cost->fabs;
43857 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
43859 if (CONST_INT_P (XEXP (x, 1)))
43861 if (INTVAL (XEXP (x, 1)) > 32)
43862 *total = cost->shift_const + COSTS_N_INSNS (2);
43864 *total = cost->shift_const * 2;
43868 if (GET_CODE (XEXP (x, 1)) == AND)
43869 *total = cost->shift_var * 2;
43871 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
43876 if (CONST_INT_P (XEXP (x, 1)))
43877 *total = cost->shift_const;
43878 else if (SUBREG_P (XEXP (x, 1))
43879 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
43881 /* Return the cost after shift-and truncation. */
43882 *total = cost->shift_var;
43886 *total = cost->shift_var;
43894 gcc_assert (FLOAT_MODE_P (mode));
43895 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
43897 /* ??? SSE scalar/vector cost should be used here. */
43898 /* ??? Bald assumption that fma has the same cost as fmul. */
43899 *total = cost->fmul;
43900 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
43902 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
43904 if (GET_CODE (sub) == NEG)
43905 sub = XEXP (sub, 0);
43906 *total += rtx_cost (sub, mode, FMA, 0, speed);
43909 if (GET_CODE (sub) == NEG)
43910 sub = XEXP (sub, 0);
43911 *total += rtx_cost (sub, mode, FMA, 2, speed);
43916 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
43918 /* ??? SSE scalar cost should be used here. */
43919 *total = cost->fmul;
43922 else if (X87_FLOAT_MODE_P (mode))
43924 *total = cost->fmul;
43927 else if (FLOAT_MODE_P (mode))
43929 /* ??? SSE vector cost should be used here. */
43930 *total = cost->fmul;
43933 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
43935 /* V*QImode is emulated with 7-13 insns. */
43936 if (mode == V16QImode || mode == V32QImode)
43939 if (TARGET_XOP && mode == V16QImode)
43941 else if (TARGET_SSSE3)
43943 *total = cost->fmul * 2 + cost->fabs * extra;
43945 /* V*DImode is emulated with 5-8 insns. */
43946 else if (mode == V2DImode || mode == V4DImode)
43948 if (TARGET_XOP && mode == V2DImode)
43949 *total = cost->fmul * 2 + cost->fabs * 3;
43951 *total = cost->fmul * 3 + cost->fabs * 5;
43953 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
43954 insns, including two PMULUDQ. */
43955 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
43956 *total = cost->fmul * 2 + cost->fabs * 5;
43958 *total = cost->fmul;
43963 rtx op0 = XEXP (x, 0);
43964 rtx op1 = XEXP (x, 1);
43966 if (CONST_INT_P (XEXP (x, 1)))
43968 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
43969 for (nbits = 0; value != 0; value &= value - 1)
43973 /* This is arbitrary. */
43976 /* Compute costs correctly for widening multiplication. */
43977 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
43978 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
43979 == GET_MODE_SIZE (mode))
43981 int is_mulwiden = 0;
43982 machine_mode inner_mode = GET_MODE (op0);
43984 if (GET_CODE (op0) == GET_CODE (op1))
43985 is_mulwiden = 1, op1 = XEXP (op1, 0);
43986 else if (CONST_INT_P (op1))
43988 if (GET_CODE (op0) == SIGN_EXTEND)
43989 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
43992 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
43996 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
43999 *total = (cost->mult_init[MODE_INDEX (mode)]
44000 + nbits * cost->mult_bit
44001 + rtx_cost (op0, mode, outer_code, opno, speed)
44002 + rtx_cost (op1, mode, outer_code, opno, speed));
44011 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44012 /* ??? SSE cost should be used here. */
44013 *total = cost->fdiv;
44014 else if (X87_FLOAT_MODE_P (mode))
44015 *total = cost->fdiv;
44016 else if (FLOAT_MODE_P (mode))
44017 /* ??? SSE vector cost should be used here. */
44018 *total = cost->fdiv;
44020 *total = cost->divide[MODE_INDEX (mode)];
44024 if (GET_MODE_CLASS (mode) == MODE_INT
44025 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
44027 if (GET_CODE (XEXP (x, 0)) == PLUS
44028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
44029 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
44030 && CONSTANT_P (XEXP (x, 1)))
44032 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
44033 if (val == 2 || val == 4 || val == 8)
44035 *total = cost->lea;
44036 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44037 outer_code, opno, speed);
44038 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
44039 outer_code, opno, speed);
44040 *total += rtx_cost (XEXP (x, 1), mode,
44041 outer_code, opno, speed);
44045 else if (GET_CODE (XEXP (x, 0)) == MULT
44046 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
44048 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
44049 if (val == 2 || val == 4 || val == 8)
44051 *total = cost->lea;
44052 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44053 outer_code, opno, speed);
44054 *total += rtx_cost (XEXP (x, 1), mode,
44055 outer_code, opno, speed);
44059 else if (GET_CODE (XEXP (x, 0)) == PLUS)
44061 *total = cost->lea;
44062 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
44063 outer_code, opno, speed);
44064 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
44065 outer_code, opno, speed);
44066 *total += rtx_cost (XEXP (x, 1), mode,
44067 outer_code, opno, speed);
44074 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44076 /* ??? SSE cost should be used here. */
44077 *total = cost->fadd;
44080 else if (X87_FLOAT_MODE_P (mode))
44082 *total = cost->fadd;
44085 else if (FLOAT_MODE_P (mode))
44087 /* ??? SSE vector cost should be used here. */
44088 *total = cost->fadd;
44096 if (GET_MODE_CLASS (mode) == MODE_INT
44097 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44099 *total = (cost->add * 2
44100 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
44101 << (GET_MODE (XEXP (x, 0)) != DImode))
44102 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
44103 << (GET_MODE (XEXP (x, 1)) != DImode)));
44109 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44111 /* ??? SSE cost should be used here. */
44112 *total = cost->fchs;
44115 else if (X87_FLOAT_MODE_P (mode))
44117 *total = cost->fchs;
44120 else if (FLOAT_MODE_P (mode))
44122 /* ??? SSE vector cost should be used here. */
44123 *total = cost->fchs;
44129 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
44131 /* ??? Should be SSE vector operation cost. */
44132 /* At least for published AMD latencies, this really is the same
44133 as the latency for a simple fpu operation like fabs. */
44134 *total = cost->fabs;
44136 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
44137 *total = cost->add * 2;
44139 *total = cost->add;
44143 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
44144 && XEXP (XEXP (x, 0), 1) == const1_rtx
44145 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
44146 && XEXP (x, 1) == const0_rtx)
44148 /* This kind of construct is implemented using test[bwl].
44149 Treat it as if we had an AND. */
44150 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
44151 *total = (cost->add
44152 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
44154 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
44158 /* The embedded comparison operand is completely free. */
44159 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
44160 && XEXP (x, 1) == const0_rtx)
44166 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
44171 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44172 /* ??? SSE cost should be used here. */
44173 *total = cost->fabs;
44174 else if (X87_FLOAT_MODE_P (mode))
44175 *total = cost->fabs;
44176 else if (FLOAT_MODE_P (mode))
44177 /* ??? SSE vector cost should be used here. */
44178 *total = cost->fabs;
44182 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
44183 /* ??? SSE cost should be used here. */
44184 *total = cost->fsqrt;
44185 else if (X87_FLOAT_MODE_P (mode))
44186 *total = cost->fsqrt;
44187 else if (FLOAT_MODE_P (mode))
44188 /* ??? SSE vector cost should be used here. */
44189 *total = cost->fsqrt;
44193 if (XINT (x, 1) == UNSPEC_TP)
44199 case VEC_DUPLICATE:
44200 /* ??? Assume all of these vector manipulation patterns are
44201 recognizable. In which case they all pretty much have the
44203 *total = cost->fabs;
44206 mask = XEXP (x, 2);
44207 /* This is masked instruction, assume the same cost,
44208 as nonmasked variant. */
44209 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
44210 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
44212 *total = cost->fabs;
44222 static int current_machopic_label_num;
44224 /* Given a symbol name and its associated stub, write out the
44225 definition of the stub. */
44228 machopic_output_stub (FILE *file, const char *symb, const char *stub)
44230 unsigned int length;
44231 char *binder_name, *symbol_name, lazy_ptr_name[32];
44232 int label = ++current_machopic_label_num;
44234 /* For 64-bit we shouldn't get here. */
44235 gcc_assert (!TARGET_64BIT);
44237 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
44238 symb = targetm.strip_name_encoding (symb);
44240 length = strlen (stub);
44241 binder_name = XALLOCAVEC (char, length + 32);
44242 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
44244 length = strlen (symb);
44245 symbol_name = XALLOCAVEC (char, length + 32);
44246 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
44248 sprintf (lazy_ptr_name, "L%d$lz", label);
44250 if (MACHOPIC_ATT_STUB)
44251 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
44252 else if (MACHOPIC_PURE)
44253 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
44255 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
44257 fprintf (file, "%s:\n", stub);
44258 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44260 if (MACHOPIC_ATT_STUB)
44262 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
44264 else if (MACHOPIC_PURE)
44267 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44268 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
44269 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
44270 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
44271 label, lazy_ptr_name, label);
44272 fprintf (file, "\tjmp\t*%%ecx\n");
44275 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
44277 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
44278 it needs no stub-binding-helper. */
44279 if (MACHOPIC_ATT_STUB)
44282 fprintf (file, "%s:\n", binder_name);
44286 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
44287 fprintf (file, "\tpushl\t%%ecx\n");
44290 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
44292 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
44294 /* N.B. Keep the correspondence of these
44295 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
44296 old-pic/new-pic/non-pic stubs; altering this will break
44297 compatibility with existing dylibs. */
44300 /* 25-byte PIC stub using "CALL get_pc_thunk". */
44301 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
44304 /* 16-byte -mdynamic-no-pic stub. */
44305 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
44307 fprintf (file, "%s:\n", lazy_ptr_name);
44308 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
44309 fprintf (file, ASM_LONG "%s\n", binder_name);
44311 #endif /* TARGET_MACHO */
44313 /* Order the registers for register allocator. */
44316 x86_order_regs_for_local_alloc (void)
44321 /* First allocate the local general purpose registers. */
44322 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44323 if (GENERAL_REGNO_P (i) && call_used_regs[i])
44324 reg_alloc_order [pos++] = i;
44326 /* Global general purpose registers. */
44327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
44328 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
44329 reg_alloc_order [pos++] = i;
44331 /* x87 registers come first in case we are doing FP math
44333 if (!TARGET_SSE_MATH)
44334 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44335 reg_alloc_order [pos++] = i;
44337 /* SSE registers. */
44338 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
44339 reg_alloc_order [pos++] = i;
44340 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
44341 reg_alloc_order [pos++] = i;
44343 /* Extended REX SSE registers. */
44344 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
44345 reg_alloc_order [pos++] = i;
44347 /* Mask register. */
44348 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
44349 reg_alloc_order [pos++] = i;
44351 /* MPX bound registers. */
44352 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
44353 reg_alloc_order [pos++] = i;
44355 /* x87 registers. */
44356 if (TARGET_SSE_MATH)
44357 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
44358 reg_alloc_order [pos++] = i;
44360 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
44361 reg_alloc_order [pos++] = i;
44363 /* Initialize the rest of array as we do not allocate some registers
44365 while (pos < FIRST_PSEUDO_REGISTER)
44366 reg_alloc_order [pos++] = 0;
44369 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
44370 in struct attribute_spec handler. */
44372 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
44375 bool *no_add_attrs)
44377 if (TREE_CODE (*node) != FUNCTION_TYPE
44378 && TREE_CODE (*node) != METHOD_TYPE
44379 && TREE_CODE (*node) != FIELD_DECL
44380 && TREE_CODE (*node) != TYPE_DECL)
44382 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44384 *no_add_attrs = true;
44389 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
44391 *no_add_attrs = true;
44394 if (is_attribute_p ("callee_pop_aggregate_return", name))
44398 cst = TREE_VALUE (args);
44399 if (TREE_CODE (cst) != INTEGER_CST)
44401 warning (OPT_Wattributes,
44402 "%qE attribute requires an integer constant argument",
44404 *no_add_attrs = true;
44406 else if (compare_tree_int (cst, 0) != 0
44407 && compare_tree_int (cst, 1) != 0)
44409 warning (OPT_Wattributes,
44410 "argument to %qE attribute is neither zero, nor one",
44412 *no_add_attrs = true;
44421 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
44422 struct attribute_spec.handler. */
44424 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
44425 bool *no_add_attrs)
44427 if (TREE_CODE (*node) != FUNCTION_TYPE
44428 && TREE_CODE (*node) != METHOD_TYPE
44429 && TREE_CODE (*node) != FIELD_DECL
44430 && TREE_CODE (*node) != TYPE_DECL)
44432 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44434 *no_add_attrs = true;
44438 /* Can combine regparm with all attributes but fastcall. */
44439 if (is_attribute_p ("ms_abi", name))
44441 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
44443 error ("ms_abi and sysv_abi attributes are not compatible");
44448 else if (is_attribute_p ("sysv_abi", name))
44450 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
44452 error ("ms_abi and sysv_abi attributes are not compatible");
44461 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
44462 struct attribute_spec.handler. */
44464 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
44465 bool *no_add_attrs)
44468 if (DECL_P (*node))
44470 if (TREE_CODE (*node) == TYPE_DECL)
44471 type = &TREE_TYPE (*node);
44476 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
44478 warning (OPT_Wattributes, "%qE attribute ignored",
44480 *no_add_attrs = true;
44483 else if ((is_attribute_p ("ms_struct", name)
44484 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
44485 || ((is_attribute_p ("gcc_struct", name)
44486 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
44488 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
44490 *no_add_attrs = true;
44497 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
44498 bool *no_add_attrs)
44500 if (TREE_CODE (*node) != FUNCTION_DECL)
44502 warning (OPT_Wattributes, "%qE attribute only applies to functions",
44504 *no_add_attrs = true;
44510 ix86_ms_bitfield_layout_p (const_tree record_type)
44512 return ((TARGET_MS_BITFIELD_LAYOUT
44513 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
44514 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
44517 /* Returns an expression indicating where the this parameter is
44518 located on entry to the FUNCTION. */
44521 x86_this_parameter (tree function)
44523 tree type = TREE_TYPE (function);
44524 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
44529 const int *parm_regs;
44531 if (ix86_function_type_abi (type) == MS_ABI)
44532 parm_regs = x86_64_ms_abi_int_parameter_registers;
44534 parm_regs = x86_64_int_parameter_registers;
44535 return gen_rtx_REG (Pmode, parm_regs[aggr]);
44538 nregs = ix86_function_regparm (type, function);
44540 if (nregs > 0 && !stdarg_p (type))
44543 unsigned int ccvt = ix86_get_callcvt (type);
44545 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44546 regno = aggr ? DX_REG : CX_REG;
44547 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44551 return gen_rtx_MEM (SImode,
44552 plus_constant (Pmode, stack_pointer_rtx, 4));
44561 return gen_rtx_MEM (SImode,
44562 plus_constant (Pmode,
44563 stack_pointer_rtx, 4));
44566 return gen_rtx_REG (SImode, regno);
44569 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
44573 /* Determine whether x86_output_mi_thunk can succeed. */
44576 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
44577 const_tree function)
44579 /* 64-bit can handle anything. */
44583 /* For 32-bit, everything's fine if we have one free register. */
44584 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
44587 /* Need a free register for vcall_offset. */
44591 /* Need a free register for GOT references. */
44592 if (flag_pic && !targetm.binds_local_p (function))
44595 /* Otherwise ok. */
44599 /* Output the assembler code for a thunk function. THUNK_DECL is the
44600 declaration for the thunk function itself, FUNCTION is the decl for
44601 the target function. DELTA is an immediate constant offset to be
44602 added to THIS. If VCALL_OFFSET is nonzero, the word at
44603 *(*this + vcall_offset) should be added to THIS. */
44606 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
44607 HOST_WIDE_INT vcall_offset, tree function)
44609 rtx this_param = x86_this_parameter (function);
44610 rtx this_reg, tmp, fnaddr;
44611 unsigned int tmp_regno;
44615 tmp_regno = R10_REG;
44618 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
44619 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
44620 tmp_regno = AX_REG;
44621 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
44622 tmp_regno = DX_REG;
44624 tmp_regno = CX_REG;
44627 emit_note (NOTE_INSN_PROLOGUE_END);
44629 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
44630 pull it in now and let DELTA benefit. */
44631 if (REG_P (this_param))
44632 this_reg = this_param;
44633 else if (vcall_offset)
44635 /* Put the this parameter into %eax. */
44636 this_reg = gen_rtx_REG (Pmode, AX_REG);
44637 emit_move_insn (this_reg, this_param);
44640 this_reg = NULL_RTX;
44642 /* Adjust the this parameter by a fixed constant. */
44645 rtx delta_rtx = GEN_INT (delta);
44646 rtx delta_dst = this_reg ? this_reg : this_param;
44650 if (!x86_64_general_operand (delta_rtx, Pmode))
44652 tmp = gen_rtx_REG (Pmode, tmp_regno);
44653 emit_move_insn (tmp, delta_rtx);
44658 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
44661 /* Adjust the this parameter by a value stored in the vtable. */
44664 rtx vcall_addr, vcall_mem, this_mem;
44666 tmp = gen_rtx_REG (Pmode, tmp_regno);
44668 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
44669 if (Pmode != ptr_mode)
44670 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
44671 emit_move_insn (tmp, this_mem);
44673 /* Adjust the this parameter. */
44674 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
44676 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
44678 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
44679 emit_move_insn (tmp2, GEN_INT (vcall_offset));
44680 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
44683 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
44684 if (Pmode != ptr_mode)
44685 emit_insn (gen_addsi_1_zext (this_reg,
44686 gen_rtx_REG (ptr_mode,
44690 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
44693 /* If necessary, drop THIS back to its stack slot. */
44694 if (this_reg && this_reg != this_param)
44695 emit_move_insn (this_param, this_reg);
44697 fnaddr = XEXP (DECL_RTL (function), 0);
44700 if (!flag_pic || targetm.binds_local_p (function)
44705 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
44706 tmp = gen_rtx_CONST (Pmode, tmp);
44707 fnaddr = gen_const_mem (Pmode, tmp);
44712 if (!flag_pic || targetm.binds_local_p (function))
44715 else if (TARGET_MACHO)
44717 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
44718 fnaddr = XEXP (fnaddr, 0);
44720 #endif /* TARGET_MACHO */
44723 tmp = gen_rtx_REG (Pmode, CX_REG);
44724 output_set_got (tmp, NULL_RTX);
44726 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
44727 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
44728 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
44729 fnaddr = gen_const_mem (Pmode, fnaddr);
44733 /* Our sibling call patterns do not allow memories, because we have no
44734 predicate that can distinguish between frame and non-frame memory.
44735 For our purposes here, we can get away with (ab)using a jump pattern,
44736 because we're going to do no optimization. */
44737 if (MEM_P (fnaddr))
44739 if (sibcall_insn_operand (fnaddr, word_mode))
44741 fnaddr = XEXP (DECL_RTL (function), 0);
44742 tmp = gen_rtx_MEM (QImode, fnaddr);
44743 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44744 tmp = emit_call_insn (tmp);
44745 SIBLING_CALL_P (tmp) = 1;
44748 emit_jump_insn (gen_indirect_jump (fnaddr));
44752 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
44754 // CM_LARGE_PIC always uses pseudo PIC register which is
44755 // uninitialized. Since FUNCTION is local and calling it
44756 // doesn't go through PLT, we use scratch register %r11 as
44757 // PIC register and initialize it here.
44758 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
44759 ix86_init_large_pic_reg (tmp_regno);
44760 fnaddr = legitimize_pic_address (fnaddr,
44761 gen_rtx_REG (Pmode, tmp_regno));
44764 if (!sibcall_insn_operand (fnaddr, word_mode))
44766 tmp = gen_rtx_REG (word_mode, tmp_regno);
44767 if (GET_MODE (fnaddr) != word_mode)
44768 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
44769 emit_move_insn (tmp, fnaddr);
44773 tmp = gen_rtx_MEM (QImode, fnaddr);
44774 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
44775 tmp = emit_call_insn (tmp);
44776 SIBLING_CALL_P (tmp) = 1;
44780 /* Emit just enough of rest_of_compilation to get the insns emitted.
44781 Note that use_thunk calls assemble_start_function et al. */
44782 insn = get_insns ();
44783 shorten_branches (insn);
44784 final_start_function (insn, file, 1);
44785 final (insn, file, 1);
44786 final_end_function ();
44790 x86_file_start (void)
44792 default_file_start ();
44794 fputs ("\t.code16gcc\n", asm_out_file);
44796 darwin_file_start ();
44798 if (X86_FILE_START_VERSION_DIRECTIVE)
44799 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
44800 if (X86_FILE_START_FLTUSED)
44801 fputs ("\t.global\t__fltused\n", asm_out_file);
44802 if (ix86_asm_dialect == ASM_INTEL)
44803 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
44807 x86_field_alignment (tree field, int computed)
44810 tree type = TREE_TYPE (field);
44812 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
44815 return iamcu_alignment (type, computed);
44816 mode = TYPE_MODE (strip_array_types (type));
44817 if (mode == DFmode || mode == DCmode
44818 || GET_MODE_CLASS (mode) == MODE_INT
44819 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
44820 return MIN (32, computed);
44824 /* Print call to TARGET to FILE. */
44827 x86_print_call_or_nop (FILE *file, const char *target)
44829 if (flag_nop_mcount)
44830 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
44832 fprintf (file, "1:\tcall\t%s\n", target);
44835 /* Output assembler code to FILE to increment profiler label # LABELNO
44836 for profiling a function entry. */
44838 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
44840 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
44844 #ifndef NO_PROFILE_COUNTERS
44845 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
44848 if (!TARGET_PECOFF && flag_pic)
44849 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
44851 x86_print_call_or_nop (file, mcount_name);
44855 #ifndef NO_PROFILE_COUNTERS
44856 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
44859 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
44863 #ifndef NO_PROFILE_COUNTERS
44864 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
44867 x86_print_call_or_nop (file, mcount_name);
44870 if (flag_record_mcount)
44872 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
44873 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
44874 fprintf (file, "\t.previous\n");
44878 /* We don't have exact information about the insn sizes, but we may assume
44879 quite safely that we are informed about all 1 byte insns and memory
44880 address sizes. This is enough to eliminate unnecessary padding in
44884 min_insn_size (rtx_insn *insn)
44888 if (!INSN_P (insn) || !active_insn_p (insn))
44891 /* Discard alignments we've emit and jump instructions. */
44892 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
44893 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
44896 /* Important case - calls are always 5 bytes.
44897 It is common to have many calls in the row. */
44899 && symbolic_reference_mentioned_p (PATTERN (insn))
44900 && !SIBLING_CALL_P (insn))
44902 len = get_attr_length (insn);
44906 /* For normal instructions we rely on get_attr_length being exact,
44907 with a few exceptions. */
44908 if (!JUMP_P (insn))
44910 enum attr_type type = get_attr_type (insn);
44915 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
44916 || asm_noperands (PATTERN (insn)) >= 0)
44923 /* Otherwise trust get_attr_length. */
44927 l = get_attr_length_address (insn);
44928 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
44937 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
44939 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
44943 ix86_avoid_jump_mispredicts (void)
44945 rtx_insn *insn, *start = get_insns ();
44946 int nbytes = 0, njumps = 0;
44947 bool isjump = false;
44949 /* Look for all minimal intervals of instructions containing 4 jumps.
44950 The intervals are bounded by START and INSN. NBYTES is the total
44951 size of instructions in the interval including INSN and not including
44952 START. When the NBYTES is smaller than 16 bytes, it is possible
44953 that the end of START and INSN ends up in the same 16byte page.
44955 The smallest offset in the page INSN can start is the case where START
44956 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
44957 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
44959 Don't consider asm goto as jump, while it can contain a jump, it doesn't
44960 have to, control transfer to label(s) can be performed through other
44961 means, and also we estimate minimum length of all asm stmts as 0. */
44962 for (insn = start; insn; insn = NEXT_INSN (insn))
44966 if (LABEL_P (insn))
44968 int align = label_to_alignment (insn);
44969 int max_skip = label_to_max_skip (insn);
44973 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
44974 already in the current 16 byte page, because otherwise
44975 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
44976 bytes to reach 16 byte boundary. */
44978 || (align <= 3 && max_skip != (1 << align) - 1))
44981 fprintf (dump_file, "Label %i with max_skip %i\n",
44982 INSN_UID (insn), max_skip);
44985 while (nbytes + max_skip >= 16)
44987 start = NEXT_INSN (start);
44988 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
44990 njumps--, isjump = true;
44993 nbytes -= min_insn_size (start);
44999 min_size = min_insn_size (insn);
45000 nbytes += min_size;
45002 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
45003 INSN_UID (insn), min_size);
45004 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
45012 start = NEXT_INSN (start);
45013 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
45015 njumps--, isjump = true;
45018 nbytes -= min_insn_size (start);
45020 gcc_assert (njumps >= 0);
45022 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
45023 INSN_UID (start), INSN_UID (insn), nbytes);
45025 if (njumps == 3 && isjump && nbytes < 16)
45027 int padsize = 15 - nbytes + min_insn_size (insn);
45030 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
45031 INSN_UID (insn), padsize);
45032 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
45038 /* AMD Athlon works faster
45039 when RET is not destination of conditional jump or directly preceded
45040 by other jump instruction. We avoid the penalty by inserting NOP just
45041 before the RET instructions in such cases. */
45043 ix86_pad_returns (void)
45048 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45050 basic_block bb = e->src;
45051 rtx_insn *ret = BB_END (bb);
45053 bool replace = false;
45055 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
45056 || optimize_bb_for_size_p (bb))
45058 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
45059 if (active_insn_p (prev) || LABEL_P (prev))
45061 if (prev && LABEL_P (prev))
45066 FOR_EACH_EDGE (e, ei, bb->preds)
45067 if (EDGE_FREQUENCY (e) && e->src->index >= 0
45068 && !(e->flags & EDGE_FALLTHRU))
45076 prev = prev_active_insn (ret);
45078 && ((JUMP_P (prev) && any_condjump_p (prev))
45081 /* Empty functions get branch mispredict even when
45082 the jump destination is not visible to us. */
45083 if (!prev && !optimize_function_for_size_p (cfun))
45088 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
45094 /* Count the minimum number of instructions in BB. Return 4 if the
45095 number of instructions >= 4. */
45098 ix86_count_insn_bb (basic_block bb)
45101 int insn_count = 0;
45103 /* Count number of instructions in this block. Return 4 if the number
45104 of instructions >= 4. */
45105 FOR_BB_INSNS (bb, insn)
45107 /* Only happen in exit blocks. */
45109 && ANY_RETURN_P (PATTERN (insn)))
45112 if (NONDEBUG_INSN_P (insn)
45113 && GET_CODE (PATTERN (insn)) != USE
45114 && GET_CODE (PATTERN (insn)) != CLOBBER)
45117 if (insn_count >= 4)
45126 /* Count the minimum number of instructions in code path in BB.
45127 Return 4 if the number of instructions >= 4. */
45130 ix86_count_insn (basic_block bb)
45134 int min_prev_count;
45136 /* Only bother counting instructions along paths with no
45137 more than 2 basic blocks between entry and exit. Given
45138 that BB has an edge to exit, determine if a predecessor
45139 of BB has an edge from entry. If so, compute the number
45140 of instructions in the predecessor block. If there
45141 happen to be multiple such blocks, compute the minimum. */
45142 min_prev_count = 4;
45143 FOR_EACH_EDGE (e, ei, bb->preds)
45146 edge_iterator prev_ei;
45148 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45150 min_prev_count = 0;
45153 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
45155 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
45157 int count = ix86_count_insn_bb (e->src);
45158 if (count < min_prev_count)
45159 min_prev_count = count;
45165 if (min_prev_count < 4)
45166 min_prev_count += ix86_count_insn_bb (bb);
45168 return min_prev_count;
45171 /* Pad short function to 4 instructions. */
45174 ix86_pad_short_function (void)
45179 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45181 rtx_insn *ret = BB_END (e->src);
45182 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
45184 int insn_count = ix86_count_insn (e->src);
45186 /* Pad short function. */
45187 if (insn_count < 4)
45189 rtx_insn *insn = ret;
45191 /* Find epilogue. */
45194 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
45195 insn = PREV_INSN (insn);
45200 /* Two NOPs count as one instruction. */
45201 insn_count = 2 * (4 - insn_count);
45202 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
45208 /* Fix up a Windows system unwinder issue. If an EH region falls through into
45209 the epilogue, the Windows system unwinder will apply epilogue logic and
45210 produce incorrect offsets. This can be avoided by adding a nop between
45211 the last insn that can throw and the first insn of the epilogue. */
45214 ix86_seh_fixup_eh_fallthru (void)
45219 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
45221 rtx_insn *insn, *next;
45223 /* Find the beginning of the epilogue. */
45224 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
45225 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
45230 /* We only care about preceding insns that can throw. */
45231 insn = prev_active_insn (insn);
45232 if (insn == NULL || !can_throw_internal (insn))
45235 /* Do not separate calls from their debug information. */
45236 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
45238 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
45239 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
45244 emit_insn_after (gen_nops (const1_rtx), insn);
45248 /* Given a register number BASE, the lowest of a group of registers, update
45249 regsets IN and OUT with the registers that should be avoided in input
45250 and output operands respectively when trying to avoid generating a modr/m
45251 byte for -fmitigate-rop. */
45254 set_rop_modrm_reg_bits (int base, HARD_REG_SET &in, HARD_REG_SET &out)
45256 SET_HARD_REG_BIT (out, base);
45257 SET_HARD_REG_BIT (out, base + 1);
45258 SET_HARD_REG_BIT (in, base + 2);
45259 SET_HARD_REG_BIT (in, base + 3);
45262 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
45263 that certain encodings of modr/m bytes do not occur. */
45265 ix86_mitigate_rop (void)
45267 HARD_REG_SET input_risky;
45268 HARD_REG_SET output_risky;
45269 HARD_REG_SET inout_risky;
45271 CLEAR_HARD_REG_SET (output_risky);
45272 CLEAR_HARD_REG_SET (input_risky);
45273 SET_HARD_REG_BIT (output_risky, AX_REG);
45274 SET_HARD_REG_BIT (output_risky, CX_REG);
45275 SET_HARD_REG_BIT (input_risky, BX_REG);
45276 SET_HARD_REG_BIT (input_risky, DX_REG);
45277 set_rop_modrm_reg_bits (FIRST_SSE_REG, input_risky, output_risky);
45278 set_rop_modrm_reg_bits (FIRST_REX_INT_REG, input_risky, output_risky);
45279 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG, input_risky, output_risky);
45280 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG, input_risky, output_risky);
45281 set_rop_modrm_reg_bits (FIRST_MASK_REG, input_risky, output_risky);
45282 set_rop_modrm_reg_bits (FIRST_BND_REG, input_risky, output_risky);
45283 COPY_HARD_REG_SET (inout_risky, input_risky);
45284 IOR_HARD_REG_SET (inout_risky, output_risky);
45286 df_note_add_problem ();
45287 /* Fix up what stack-regs did. */
45288 df_insn_rescan_all ();
45291 regrename_init (true);
45292 regrename_analyze (NULL);
45294 auto_vec<du_head_p> cands;
45296 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
45298 if (!NONDEBUG_INSN_P (insn))
45301 if (GET_CODE (PATTERN (insn)) == USE
45302 || GET_CODE (PATTERN (insn)) == CLOBBER)
45305 extract_insn (insn);
45308 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45309 recog_data.n_operands, &opno0,
45312 if (!ix86_rop_should_change_byte_p (modrm))
45315 insn_rr_info *info = &insn_rr[INSN_UID (insn)];
45317 /* This happens when regrename has to fail a block. */
45318 if (!info->op_info)
45321 if (info->op_info[opno0].n_chains != 0)
45323 gcc_assert (info->op_info[opno0].n_chains == 1);
45325 op0c = regrename_chain_from_id (info->op_info[opno0].heads[0]->id);
45326 if (op0c->target_data_1 + op0c->target_data_2 == 0
45327 && !op0c->cannot_rename)
45328 cands.safe_push (op0c);
45330 op0c->target_data_1++;
45332 if (info->op_info[opno1].n_chains != 0)
45334 gcc_assert (info->op_info[opno1].n_chains == 1);
45336 op1c = regrename_chain_from_id (info->op_info[opno1].heads[0]->id);
45337 if (op1c->target_data_1 + op1c->target_data_2 == 0
45338 && !op1c->cannot_rename)
45339 cands.safe_push (op1c);
45341 op1c->target_data_2++;
45347 FOR_EACH_VEC_ELT (cands, i, head)
45349 int old_reg, best_reg;
45350 HARD_REG_SET unavailable;
45352 CLEAR_HARD_REG_SET (unavailable);
45353 if (head->target_data_1)
45354 IOR_HARD_REG_SET (unavailable, output_risky);
45355 if (head->target_data_2)
45356 IOR_HARD_REG_SET (unavailable, input_risky);
45359 reg_class superclass = regrename_find_superclass (head, &n_uses,
45361 old_reg = head->regno;
45362 best_reg = find_rename_reg (head, superclass, &unavailable,
45364 bool ok = regrename_do_replace (head, best_reg);
45367 fprintf (dump_file, "Chain %d renamed as %s in %s\n", head->id,
45368 reg_names[best_reg], reg_class_names[superclass]);
45372 regrename_finish ();
45379 INIT_REG_SET (&live);
45381 FOR_EACH_BB_FN (bb, cfun)
45385 COPY_REG_SET (&live, DF_LR_OUT (bb));
45386 df_simulate_initialize_backwards (bb, &live);
45388 FOR_BB_INSNS_REVERSE (bb, insn)
45390 if (!NONDEBUG_INSN_P (insn))
45393 df_simulate_one_insn_backwards (bb, insn, &live);
45395 if (GET_CODE (PATTERN (insn)) == USE
45396 || GET_CODE (PATTERN (insn)) == CLOBBER)
45399 extract_insn (insn);
45400 constrain_operands_cached (insn, reload_completed);
45402 int modrm = ix86_get_modrm_for_rop (insn, recog_data.operand,
45403 recog_data.n_operands, &opno0,
45406 || !ix86_rop_should_change_byte_p (modrm)
45410 rtx oldreg = recog_data.operand[opno1];
45411 preprocess_constraints (insn);
45412 const operand_alternative *alt = which_op_alt ();
45415 for (i = 0; i < recog_data.n_operands; i++)
45417 && alt[i].earlyclobber
45418 && reg_overlap_mentioned_p (recog_data.operand[i],
45422 if (i < recog_data.n_operands)
45426 fprintf (dump_file,
45427 "attempting to fix modrm byte in insn %d:"
45428 " reg %d class %s", INSN_UID (insn), REGNO (oldreg),
45429 reg_class_names[alt[opno1].cl]);
45431 HARD_REG_SET unavailable;
45432 REG_SET_TO_HARD_REG_SET (unavailable, &live);
45433 SET_HARD_REG_BIT (unavailable, REGNO (oldreg));
45434 IOR_COMPL_HARD_REG_SET (unavailable, call_used_reg_set);
45435 IOR_HARD_REG_SET (unavailable, fixed_reg_set);
45436 IOR_HARD_REG_SET (unavailable, output_risky);
45437 IOR_COMPL_HARD_REG_SET (unavailable,
45438 reg_class_contents[alt[opno1].cl]);
45440 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
45441 if (!TEST_HARD_REG_BIT (unavailable, i))
45443 if (i == FIRST_PSEUDO_REGISTER)
45446 fprintf (dump_file, ", none available\n");
45450 fprintf (dump_file, " -> %d\n", i);
45451 rtx newreg = gen_rtx_REG (recog_data.operand_mode[opno1], i);
45452 validate_change (insn, recog_data.operand_loc[opno1], newreg, false);
45453 insn = emit_insn_before (gen_move_insn (newreg, oldreg), insn);
45458 /* Implement machine specific optimizations. We implement padding of returns
45459 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
45463 /* We are freeing block_for_insn in the toplev to keep compatibility
45464 with old MDEP_REORGS that are not CFG based. Recompute it now. */
45465 compute_bb_for_insn ();
45467 if (flag_mitigate_rop)
45468 ix86_mitigate_rop ();
45470 if (TARGET_SEH && current_function_has_exception_handlers ())
45471 ix86_seh_fixup_eh_fallthru ();
45473 if (optimize && optimize_function_for_speed_p (cfun))
45475 if (TARGET_PAD_SHORT_FUNCTION)
45476 ix86_pad_short_function ();
45477 else if (TARGET_PAD_RETURNS)
45478 ix86_pad_returns ();
45479 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
45480 if (TARGET_FOUR_JUMP_LIMIT)
45481 ix86_avoid_jump_mispredicts ();
45486 /* Return nonzero when QImode register that must be represented via REX prefix
45489 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
45492 extract_insn_cached (insn);
45493 for (i = 0; i < recog_data.n_operands; i++)
45494 if (GENERAL_REG_P (recog_data.operand[i])
45495 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
45500 /* Return true when INSN mentions register that must be encoded using REX
45503 x86_extended_reg_mentioned_p (rtx insn)
45505 subrtx_iterator::array_type array;
45506 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
45508 const_rtx x = *iter;
45510 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
45516 /* If profitable, negate (without causing overflow) integer constant
45517 of mode MODE at location LOC. Return true in this case. */
45519 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
45523 if (!CONST_INT_P (*loc))
45529 /* DImode x86_64 constants must fit in 32 bits. */
45530 gcc_assert (x86_64_immediate_operand (*loc, mode));
45541 gcc_unreachable ();
45544 /* Avoid overflows. */
45545 if (mode_signbit_p (mode, *loc))
45548 val = INTVAL (*loc);
45550 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
45551 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
45552 if ((val < 0 && val != -128)
45555 *loc = GEN_INT (-val);
45562 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
45563 optabs would emit if we didn't have TFmode patterns. */
45566 x86_emit_floatuns (rtx operands[2])
45568 rtx_code_label *neglab, *donelab;
45569 rtx i0, i1, f0, in, out;
45570 machine_mode mode, inmode;
45572 inmode = GET_MODE (operands[1]);
45573 gcc_assert (inmode == SImode || inmode == DImode);
45576 in = force_reg (inmode, operands[1]);
45577 mode = GET_MODE (out);
45578 neglab = gen_label_rtx ();
45579 donelab = gen_label_rtx ();
45580 f0 = gen_reg_rtx (mode);
45582 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
45584 expand_float (out, in, 0);
45586 emit_jump_insn (gen_jump (donelab));
45589 emit_label (neglab);
45591 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
45593 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
45595 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
45597 expand_float (f0, i0, 0);
45599 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
45601 emit_label (donelab);
45604 static bool canonicalize_perm (struct expand_vec_perm_d *d);
45605 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
45606 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
45607 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
45609 /* Get a vector mode of the same size as the original but with elements
45610 twice as wide. This is only guaranteed to apply to integral vectors. */
45612 static inline machine_mode
45613 get_mode_wider_vector (machine_mode o)
45615 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
45616 machine_mode n = GET_MODE_WIDER_MODE (o);
45617 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
45618 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
45622 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
45623 fill target with val via vec_duplicate. */
45626 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
45632 /* First attempt to recognize VAL as-is. */
45633 dup = gen_rtx_VEC_DUPLICATE (mode, val);
45634 insn = emit_insn (gen_rtx_SET (target, dup));
45635 if (recog_memoized (insn) < 0)
45638 /* If that fails, force VAL into a register. */
45641 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
45642 seq = get_insns ();
45645 emit_insn_before (seq, insn);
45647 ok = recog_memoized (insn) >= 0;
45653 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45654 with all elements equal to VAR. Return true if successful. */
45657 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
45658 rtx target, rtx val)
45682 return ix86_vector_duplicate_value (mode, target, val);
45687 if (TARGET_SSE || TARGET_3DNOW_A)
45691 val = gen_lowpart (SImode, val);
45692 x = gen_rtx_TRUNCATE (HImode, val);
45693 x = gen_rtx_VEC_DUPLICATE (mode, x);
45694 emit_insn (gen_rtx_SET (target, x));
45706 return ix86_vector_duplicate_value (mode, target, val);
45710 struct expand_vec_perm_d dperm;
45714 memset (&dperm, 0, sizeof (dperm));
45715 dperm.target = target;
45716 dperm.vmode = mode;
45717 dperm.nelt = GET_MODE_NUNITS (mode);
45718 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
45719 dperm.one_operand_p = true;
45721 /* Extend to SImode using a paradoxical SUBREG. */
45722 tmp1 = gen_reg_rtx (SImode);
45723 emit_move_insn (tmp1, gen_lowpart (SImode, val));
45725 /* Insert the SImode value as low element of a V4SImode vector. */
45726 tmp2 = gen_reg_rtx (V4SImode);
45727 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
45728 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
45730 ok = (expand_vec_perm_1 (&dperm)
45731 || expand_vec_perm_broadcast_1 (&dperm));
45739 return ix86_vector_duplicate_value (mode, target, val);
45746 /* Replicate the value once into the next wider mode and recurse. */
45748 machine_mode smode, wsmode, wvmode;
45751 smode = GET_MODE_INNER (mode);
45752 wvmode = get_mode_wider_vector (mode);
45753 wsmode = GET_MODE_INNER (wvmode);
45755 val = convert_modes (wsmode, smode, val, true);
45756 x = expand_simple_binop (wsmode, ASHIFT, val,
45757 GEN_INT (GET_MODE_BITSIZE (smode)),
45758 NULL_RTX, 1, OPTAB_LIB_WIDEN);
45759 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
45761 x = gen_reg_rtx (wvmode);
45762 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
45764 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
45771 return ix86_vector_duplicate_value (mode, target, val);
45774 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
45775 rtx x = gen_reg_rtx (hvmode);
45777 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45780 x = gen_rtx_VEC_CONCAT (mode, x, x);
45781 emit_insn (gen_rtx_SET (target, x));
45787 if (TARGET_AVX512BW)
45788 return ix86_vector_duplicate_value (mode, target, val);
45791 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
45792 rtx x = gen_reg_rtx (hvmode);
45794 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
45797 x = gen_rtx_VEC_CONCAT (mode, x, x);
45798 emit_insn (gen_rtx_SET (target, x));
45807 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45808 whose ONE_VAR element is VAR, and other elements are zero. Return true
45812 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
45813 rtx target, rtx var, int one_var)
45815 machine_mode vsimode;
45818 bool use_vector_set = false;
45823 /* For SSE4.1, we normally use vector set. But if the second
45824 element is zero and inter-unit moves are OK, we use movq
45826 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
45827 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
45833 use_vector_set = TARGET_SSE4_1;
45836 use_vector_set = TARGET_SSE2;
45839 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
45846 use_vector_set = TARGET_AVX;
45849 /* Use ix86_expand_vector_set in 64bit mode only. */
45850 use_vector_set = TARGET_AVX && TARGET_64BIT;
45856 if (use_vector_set)
45858 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
45859 var = force_reg (GET_MODE_INNER (mode), var);
45860 ix86_expand_vector_set (mmx_ok, target, var, one_var);
45876 var = force_reg (GET_MODE_INNER (mode), var);
45877 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
45878 emit_insn (gen_rtx_SET (target, x));
45883 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
45884 new_target = gen_reg_rtx (mode);
45886 new_target = target;
45887 var = force_reg (GET_MODE_INNER (mode), var);
45888 x = gen_rtx_VEC_DUPLICATE (mode, var);
45889 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
45890 emit_insn (gen_rtx_SET (new_target, x));
45893 /* We need to shuffle the value to the correct position, so
45894 create a new pseudo to store the intermediate result. */
45896 /* With SSE2, we can use the integer shuffle insns. */
45897 if (mode != V4SFmode && TARGET_SSE2)
45899 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
45901 GEN_INT (one_var == 1 ? 0 : 1),
45902 GEN_INT (one_var == 2 ? 0 : 1),
45903 GEN_INT (one_var == 3 ? 0 : 1)));
45904 if (target != new_target)
45905 emit_move_insn (target, new_target);
45909 /* Otherwise convert the intermediate result to V4SFmode and
45910 use the SSE1 shuffle instructions. */
45911 if (mode != V4SFmode)
45913 tmp = gen_reg_rtx (V4SFmode);
45914 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
45919 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
45921 GEN_INT (one_var == 1 ? 0 : 1),
45922 GEN_INT (one_var == 2 ? 0+4 : 1+4),
45923 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
45925 if (mode != V4SFmode)
45926 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
45927 else if (tmp != target)
45928 emit_move_insn (target, tmp);
45930 else if (target != new_target)
45931 emit_move_insn (target, new_target);
45936 vsimode = V4SImode;
45942 vsimode = V2SImode;
45948 /* Zero extend the variable element to SImode and recurse. */
45949 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
45951 x = gen_reg_rtx (vsimode);
45952 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
45954 gcc_unreachable ();
45956 emit_move_insn (target, gen_lowpart (mode, x));
45964 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
45965 consisting of the values in VALS. It is known that all elements
45966 except ONE_VAR are constants. Return true if successful. */
45969 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
45970 rtx target, rtx vals, int one_var)
45972 rtx var = XVECEXP (vals, 0, one_var);
45973 machine_mode wmode;
45976 const_vec = copy_rtx (vals);
45977 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
45978 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
45986 /* For the two element vectors, it's just as easy to use
45987 the general case. */
45991 /* Use ix86_expand_vector_set in 64bit mode only. */
46014 /* There's no way to set one QImode entry easily. Combine
46015 the variable value with its adjacent constant value, and
46016 promote to an HImode set. */
46017 x = XVECEXP (vals, 0, one_var ^ 1);
46020 var = convert_modes (HImode, QImode, var, true);
46021 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
46022 NULL_RTX, 1, OPTAB_LIB_WIDEN);
46023 x = GEN_INT (INTVAL (x) & 0xff);
46027 var = convert_modes (HImode, QImode, var, true);
46028 x = gen_int_mode (INTVAL (x) << 8, HImode);
46030 if (x != const0_rtx)
46031 var = expand_simple_binop (HImode, IOR, var, x, var,
46032 1, OPTAB_LIB_WIDEN);
46034 x = gen_reg_rtx (wmode);
46035 emit_move_insn (x, gen_lowpart (wmode, const_vec));
46036 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
46038 emit_move_insn (target, gen_lowpart (mode, x));
46045 emit_move_insn (target, const_vec);
46046 ix86_expand_vector_set (mmx_ok, target, var, one_var);
46050 /* A subroutine of ix86_expand_vector_init_general. Use vector
46051 concatenate to handle the most general case: all values variable,
46052 and none identical. */
46055 ix86_expand_vector_init_concat (machine_mode mode,
46056 rtx target, rtx *ops, int n)
46058 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
46059 rtx first[16], second[8], third[4];
46111 gcc_unreachable ();
46114 if (!register_operand (ops[1], cmode))
46115 ops[1] = force_reg (cmode, ops[1]);
46116 if (!register_operand (ops[0], cmode))
46117 ops[0] = force_reg (cmode, ops[0]);
46118 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
46138 gcc_unreachable ();
46162 gcc_unreachable ();
46180 gcc_unreachable ();
46185 /* FIXME: We process inputs backward to help RA. PR 36222. */
46188 for (; i > 0; i -= 2, j--)
46190 first[j] = gen_reg_rtx (cmode);
46191 v = gen_rtvec (2, ops[i - 1], ops[i]);
46192 ix86_expand_vector_init (false, first[j],
46193 gen_rtx_PARALLEL (cmode, v));
46199 gcc_assert (hmode != VOIDmode);
46200 gcc_assert (gmode != VOIDmode);
46201 for (i = j = 0; i < n; i += 2, j++)
46203 second[j] = gen_reg_rtx (hmode);
46204 ix86_expand_vector_init_concat (hmode, second [j],
46208 for (i = j = 0; i < n; i += 2, j++)
46210 third[j] = gen_reg_rtx (gmode);
46211 ix86_expand_vector_init_concat (gmode, third[j],
46215 ix86_expand_vector_init_concat (mode, target, third, n);
46219 gcc_assert (hmode != VOIDmode);
46220 for (i = j = 0; i < n; i += 2, j++)
46222 second[j] = gen_reg_rtx (hmode);
46223 ix86_expand_vector_init_concat (hmode, second [j],
46227 ix86_expand_vector_init_concat (mode, target, second, n);
46230 ix86_expand_vector_init_concat (mode, target, first, n);
46234 gcc_unreachable ();
46238 /* A subroutine of ix86_expand_vector_init_general. Use vector
46239 interleave to handle the most general case: all values variable,
46240 and none identical. */
46243 ix86_expand_vector_init_interleave (machine_mode mode,
46244 rtx target, rtx *ops, int n)
46246 machine_mode first_imode, second_imode, third_imode, inner_mode;
46249 rtx (*gen_load_even) (rtx, rtx, rtx);
46250 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
46251 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
46256 gen_load_even = gen_vec_setv8hi;
46257 gen_interleave_first_low = gen_vec_interleave_lowv4si;
46258 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46259 inner_mode = HImode;
46260 first_imode = V4SImode;
46261 second_imode = V2DImode;
46262 third_imode = VOIDmode;
46265 gen_load_even = gen_vec_setv16qi;
46266 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
46267 gen_interleave_second_low = gen_vec_interleave_lowv4si;
46268 inner_mode = QImode;
46269 first_imode = V8HImode;
46270 second_imode = V4SImode;
46271 third_imode = V2DImode;
46274 gcc_unreachable ();
46277 for (i = 0; i < n; i++)
46279 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
46280 op0 = gen_reg_rtx (SImode);
46281 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
46283 /* Insert the SImode value as low element of V4SImode vector. */
46284 op1 = gen_reg_rtx (V4SImode);
46285 op0 = gen_rtx_VEC_MERGE (V4SImode,
46286 gen_rtx_VEC_DUPLICATE (V4SImode,
46288 CONST0_RTX (V4SImode),
46290 emit_insn (gen_rtx_SET (op1, op0));
46292 /* Cast the V4SImode vector back to a vector in orignal mode. */
46293 op0 = gen_reg_rtx (mode);
46294 emit_move_insn (op0, gen_lowpart (mode, op1));
46296 /* Load even elements into the second position. */
46297 emit_insn (gen_load_even (op0,
46298 force_reg (inner_mode,
46302 /* Cast vector to FIRST_IMODE vector. */
46303 ops[i] = gen_reg_rtx (first_imode);
46304 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
46307 /* Interleave low FIRST_IMODE vectors. */
46308 for (i = j = 0; i < n; i += 2, j++)
46310 op0 = gen_reg_rtx (first_imode);
46311 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
46313 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
46314 ops[j] = gen_reg_rtx (second_imode);
46315 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
46318 /* Interleave low SECOND_IMODE vectors. */
46319 switch (second_imode)
46322 for (i = j = 0; i < n / 2; i += 2, j++)
46324 op0 = gen_reg_rtx (second_imode);
46325 emit_insn (gen_interleave_second_low (op0, ops[i],
46328 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
46330 ops[j] = gen_reg_rtx (third_imode);
46331 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
46333 second_imode = V2DImode;
46334 gen_interleave_second_low = gen_vec_interleave_lowv2di;
46338 op0 = gen_reg_rtx (second_imode);
46339 emit_insn (gen_interleave_second_low (op0, ops[0],
46342 /* Cast the SECOND_IMODE vector back to a vector on original
46344 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
46348 gcc_unreachable ();
46352 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
46353 all values variable, and none identical. */
46356 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
46357 rtx target, rtx vals)
46359 rtx ops[64], op0, op1, op2, op3, op4, op5;
46360 machine_mode half_mode = VOIDmode;
46361 machine_mode quarter_mode = VOIDmode;
46368 if (!mmx_ok && !TARGET_SSE)
46384 n = GET_MODE_NUNITS (mode);
46385 for (i = 0; i < n; i++)
46386 ops[i] = XVECEXP (vals, 0, i);
46387 ix86_expand_vector_init_concat (mode, target, ops, n);
46391 half_mode = V16QImode;
46395 half_mode = V8HImode;
46399 n = GET_MODE_NUNITS (mode);
46400 for (i = 0; i < n; i++)
46401 ops[i] = XVECEXP (vals, 0, i);
46402 op0 = gen_reg_rtx (half_mode);
46403 op1 = gen_reg_rtx (half_mode);
46404 ix86_expand_vector_init_interleave (half_mode, op0, ops,
46406 ix86_expand_vector_init_interleave (half_mode, op1,
46407 &ops [n >> 1], n >> 2);
46408 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
46412 quarter_mode = V16QImode;
46413 half_mode = V32QImode;
46417 quarter_mode = V8HImode;
46418 half_mode = V16HImode;
46422 n = GET_MODE_NUNITS (mode);
46423 for (i = 0; i < n; i++)
46424 ops[i] = XVECEXP (vals, 0, i);
46425 op0 = gen_reg_rtx (quarter_mode);
46426 op1 = gen_reg_rtx (quarter_mode);
46427 op2 = gen_reg_rtx (quarter_mode);
46428 op3 = gen_reg_rtx (quarter_mode);
46429 op4 = gen_reg_rtx (half_mode);
46430 op5 = gen_reg_rtx (half_mode);
46431 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
46433 ix86_expand_vector_init_interleave (quarter_mode, op1,
46434 &ops [n >> 2], n >> 3);
46435 ix86_expand_vector_init_interleave (quarter_mode, op2,
46436 &ops [n >> 1], n >> 3);
46437 ix86_expand_vector_init_interleave (quarter_mode, op3,
46438 &ops [(n >> 1) | (n >> 2)], n >> 3);
46439 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
46440 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
46441 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
46445 if (!TARGET_SSE4_1)
46453 /* Don't use ix86_expand_vector_init_interleave if we can't
46454 move from GPR to SSE register directly. */
46455 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
46458 n = GET_MODE_NUNITS (mode);
46459 for (i = 0; i < n; i++)
46460 ops[i] = XVECEXP (vals, 0, i);
46461 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
46469 gcc_unreachable ();
46473 int i, j, n_elts, n_words, n_elt_per_word;
46474 machine_mode inner_mode;
46475 rtx words[4], shift;
46477 inner_mode = GET_MODE_INNER (mode);
46478 n_elts = GET_MODE_NUNITS (mode);
46479 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46480 n_elt_per_word = n_elts / n_words;
46481 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
46483 for (i = 0; i < n_words; ++i)
46485 rtx word = NULL_RTX;
46487 for (j = 0; j < n_elt_per_word; ++j)
46489 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
46490 elt = convert_modes (word_mode, inner_mode, elt, true);
46496 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
46497 word, 1, OPTAB_LIB_WIDEN);
46498 word = expand_simple_binop (word_mode, IOR, word, elt,
46499 word, 1, OPTAB_LIB_WIDEN);
46507 emit_move_insn (target, gen_lowpart (mode, words[0]));
46508 else if (n_words == 2)
46510 rtx tmp = gen_reg_rtx (mode);
46511 emit_clobber (tmp);
46512 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
46513 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
46514 emit_move_insn (target, tmp);
46516 else if (n_words == 4)
46518 rtx tmp = gen_reg_rtx (V4SImode);
46519 gcc_assert (word_mode == SImode);
46520 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
46521 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
46522 emit_move_insn (target, gen_lowpart (mode, tmp));
46525 gcc_unreachable ();
46529 /* Initialize vector TARGET via VALS. Suppress the use of MMX
46530 instructions unless MMX_OK is true. */
46533 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
46535 machine_mode mode = GET_MODE (target);
46536 machine_mode inner_mode = GET_MODE_INNER (mode);
46537 int n_elts = GET_MODE_NUNITS (mode);
46538 int n_var = 0, one_var = -1;
46539 bool all_same = true, all_const_zero = true;
46543 for (i = 0; i < n_elts; ++i)
46545 x = XVECEXP (vals, 0, i);
46546 if (!(CONST_SCALAR_INT_P (x)
46547 || CONST_DOUBLE_P (x)
46548 || CONST_FIXED_P (x)))
46549 n_var++, one_var = i;
46550 else if (x != CONST0_RTX (inner_mode))
46551 all_const_zero = false;
46552 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
46556 /* Constants are best loaded from the constant pool. */
46559 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
46563 /* If all values are identical, broadcast the value. */
46565 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
46566 XVECEXP (vals, 0, 0)))
46569 /* Values where only one field is non-constant are best loaded from
46570 the pool and overwritten via move later. */
46574 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
46575 XVECEXP (vals, 0, one_var),
46579 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
46583 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
46587 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
46589 machine_mode mode = GET_MODE (target);
46590 machine_mode inner_mode = GET_MODE_INNER (mode);
46591 machine_mode half_mode;
46592 bool use_vec_merge = false;
46594 static rtx (*gen_extract[6][2]) (rtx, rtx)
46596 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
46597 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
46598 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
46599 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
46600 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
46601 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
46603 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
46605 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
46606 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
46607 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
46608 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
46609 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
46610 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
46613 machine_mode mmode = VOIDmode;
46614 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
46622 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46623 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
46625 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46627 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46628 emit_insn (gen_rtx_SET (target, tmp));
46634 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
46638 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
46639 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
46641 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
46643 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
46644 emit_insn (gen_rtx_SET (target, tmp));
46651 /* For the two element vectors, we implement a VEC_CONCAT with
46652 the extraction of the other element. */
46654 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
46655 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
46658 op0 = val, op1 = tmp;
46660 op0 = tmp, op1 = val;
46662 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
46663 emit_insn (gen_rtx_SET (target, tmp));
46668 use_vec_merge = TARGET_SSE4_1;
46675 use_vec_merge = true;
46679 /* tmp = target = A B C D */
46680 tmp = copy_to_reg (target);
46681 /* target = A A B B */
46682 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
46683 /* target = X A B B */
46684 ix86_expand_vector_set (false, target, val, 0);
46685 /* target = A X C D */
46686 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46687 const1_rtx, const0_rtx,
46688 GEN_INT (2+4), GEN_INT (3+4)));
46692 /* tmp = target = A B C D */
46693 tmp = copy_to_reg (target);
46694 /* tmp = X B C D */
46695 ix86_expand_vector_set (false, tmp, val, 0);
46696 /* target = A B X D */
46697 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46698 const0_rtx, const1_rtx,
46699 GEN_INT (0+4), GEN_INT (3+4)));
46703 /* tmp = target = A B C D */
46704 tmp = copy_to_reg (target);
46705 /* tmp = X B C D */
46706 ix86_expand_vector_set (false, tmp, val, 0);
46707 /* target = A B X D */
46708 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
46709 const0_rtx, const1_rtx,
46710 GEN_INT (2+4), GEN_INT (0+4)));
46714 gcc_unreachable ();
46719 use_vec_merge = TARGET_SSE4_1;
46723 /* Element 0 handled by vec_merge below. */
46726 use_vec_merge = true;
46732 /* With SSE2, use integer shuffles to swap element 0 and ELT,
46733 store into element 0, then shuffle them back. */
46737 order[0] = GEN_INT (elt);
46738 order[1] = const1_rtx;
46739 order[2] = const2_rtx;
46740 order[3] = GEN_INT (3);
46741 order[elt] = const0_rtx;
46743 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46744 order[1], order[2], order[3]));
46746 ix86_expand_vector_set (false, target, val, 0);
46748 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
46749 order[1], order[2], order[3]));
46753 /* For SSE1, we have to reuse the V4SF code. */
46754 rtx t = gen_reg_rtx (V4SFmode);
46755 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
46756 emit_move_insn (target, gen_lowpart (mode, t));
46761 use_vec_merge = TARGET_SSE2;
46764 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
46768 use_vec_merge = TARGET_SSE4_1;
46775 half_mode = V16QImode;
46781 half_mode = V8HImode;
46787 half_mode = V4SImode;
46793 half_mode = V2DImode;
46799 half_mode = V4SFmode;
46805 half_mode = V2DFmode;
46811 /* Compute offset. */
46815 gcc_assert (i <= 1);
46817 /* Extract the half. */
46818 tmp = gen_reg_rtx (half_mode);
46819 emit_insn (gen_extract[j][i] (tmp, target));
46821 /* Put val in tmp at elt. */
46822 ix86_expand_vector_set (false, tmp, val, elt);
46825 emit_insn (gen_insert[j][i] (target, target, tmp));
46829 if (TARGET_AVX512F)
46832 gen_blendm = gen_avx512f_blendmv8df;
46837 if (TARGET_AVX512F)
46840 gen_blendm = gen_avx512f_blendmv8di;
46845 if (TARGET_AVX512F)
46848 gen_blendm = gen_avx512f_blendmv16sf;
46853 if (TARGET_AVX512F)
46856 gen_blendm = gen_avx512f_blendmv16si;
46861 if (TARGET_AVX512F && TARGET_AVX512BW)
46864 gen_blendm = gen_avx512bw_blendmv32hi;
46869 if (TARGET_AVX512F && TARGET_AVX512BW)
46872 gen_blendm = gen_avx512bw_blendmv64qi;
46880 if (mmode != VOIDmode)
46882 tmp = gen_reg_rtx (mode);
46883 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
46884 emit_insn (gen_blendm (target, tmp, target,
46886 gen_int_mode (1 << elt, mmode))));
46888 else if (use_vec_merge)
46890 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
46891 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
46892 emit_insn (gen_rtx_SET (target, tmp));
46896 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
46898 emit_move_insn (mem, target);
46900 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
46901 emit_move_insn (tmp, val);
46903 emit_move_insn (target, mem);
46908 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
46910 machine_mode mode = GET_MODE (vec);
46911 machine_mode inner_mode = GET_MODE_INNER (mode);
46912 bool use_vec_extr = false;
46925 use_vec_extr = true;
46929 use_vec_extr = TARGET_SSE4_1;
46941 tmp = gen_reg_rtx (mode);
46942 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
46943 GEN_INT (elt), GEN_INT (elt),
46944 GEN_INT (elt+4), GEN_INT (elt+4)));
46948 tmp = gen_reg_rtx (mode);
46949 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
46953 gcc_unreachable ();
46956 use_vec_extr = true;
46961 use_vec_extr = TARGET_SSE4_1;
46975 tmp = gen_reg_rtx (mode);
46976 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
46977 GEN_INT (elt), GEN_INT (elt),
46978 GEN_INT (elt), GEN_INT (elt)));
46982 tmp = gen_reg_rtx (mode);
46983 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
46987 gcc_unreachable ();
46990 use_vec_extr = true;
46995 /* For SSE1, we have to reuse the V4SF code. */
46996 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
46997 gen_lowpart (V4SFmode, vec), elt);
47003 use_vec_extr = TARGET_SSE2;
47006 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
47010 use_vec_extr = TARGET_SSE4_1;
47016 tmp = gen_reg_rtx (V4SFmode);
47018 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
47020 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
47021 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47029 tmp = gen_reg_rtx (V2DFmode);
47031 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
47033 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
47034 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47042 tmp = gen_reg_rtx (V16QImode);
47044 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
47046 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
47047 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47055 tmp = gen_reg_rtx (V8HImode);
47057 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
47059 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
47060 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47068 tmp = gen_reg_rtx (V4SImode);
47070 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
47072 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
47073 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47081 tmp = gen_reg_rtx (V2DImode);
47083 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
47085 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
47086 ix86_expand_vector_extract (false, target, tmp, elt & 1);
47092 if (TARGET_AVX512BW)
47094 tmp = gen_reg_rtx (V16HImode);
47096 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
47098 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
47099 ix86_expand_vector_extract (false, target, tmp, elt & 15);
47105 if (TARGET_AVX512BW)
47107 tmp = gen_reg_rtx (V32QImode);
47109 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
47111 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
47112 ix86_expand_vector_extract (false, target, tmp, elt & 31);
47118 tmp = gen_reg_rtx (V8SFmode);
47120 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
47122 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
47123 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47127 tmp = gen_reg_rtx (V4DFmode);
47129 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
47131 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
47132 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47136 tmp = gen_reg_rtx (V8SImode);
47138 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
47140 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
47141 ix86_expand_vector_extract (false, target, tmp, elt & 7);
47145 tmp = gen_reg_rtx (V4DImode);
47147 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
47149 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
47150 ix86_expand_vector_extract (false, target, tmp, elt & 3);
47154 /* ??? Could extract the appropriate HImode element and shift. */
47161 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
47162 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
47164 /* Let the rtl optimizers know about the zero extension performed. */
47165 if (inner_mode == QImode || inner_mode == HImode)
47167 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
47168 target = gen_lowpart (SImode, target);
47171 emit_insn (gen_rtx_SET (target, tmp));
47175 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
47177 emit_move_insn (mem, vec);
47179 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
47180 emit_move_insn (target, tmp);
47184 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
47185 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
47186 The upper bits of DEST are undefined, though they shouldn't cause
47187 exceptions (some bits from src or all zeros are ok). */
47190 emit_reduc_half (rtx dest, rtx src, int i)
47193 switch (GET_MODE (src))
47197 tem = gen_sse_movhlps (dest, src, src);
47199 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
47200 GEN_INT (1 + 4), GEN_INT (1 + 4));
47203 tem = gen_vec_interleave_highv2df (dest, src, src);
47209 d = gen_reg_rtx (V1TImode);
47210 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
47215 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
47217 tem = gen_avx_shufps256 (dest, src, src,
47218 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
47222 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
47224 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
47232 if (GET_MODE (dest) != V4DImode)
47233 d = gen_reg_rtx (V4DImode);
47234 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
47235 gen_lowpart (V4DImode, src),
47240 d = gen_reg_rtx (V2TImode);
47241 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
47252 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
47253 gen_lowpart (V16SImode, src),
47254 gen_lowpart (V16SImode, src),
47255 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
47256 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
47257 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
47258 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
47259 GEN_INT (0xC), GEN_INT (0xD),
47260 GEN_INT (0xE), GEN_INT (0xF),
47261 GEN_INT (0x10), GEN_INT (0x11),
47262 GEN_INT (0x12), GEN_INT (0x13),
47263 GEN_INT (0x14), GEN_INT (0x15),
47264 GEN_INT (0x16), GEN_INT (0x17));
47266 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
47267 gen_lowpart (V16SImode, src),
47268 GEN_INT (i == 128 ? 0x2 : 0x1),
47272 GEN_INT (i == 128 ? 0x6 : 0x5),
47276 GEN_INT (i == 128 ? 0xA : 0x9),
47280 GEN_INT (i == 128 ? 0xE : 0xD),
47286 gcc_unreachable ();
47290 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
47293 /* Expand a vector reduction. FN is the binary pattern to reduce;
47294 DEST is the destination; IN is the input vector. */
47297 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
47299 rtx half, dst, vec = in;
47300 machine_mode mode = GET_MODE (in);
47303 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
47305 && mode == V8HImode
47306 && fn == gen_uminv8hi3)
47308 emit_insn (gen_sse4_1_phminposuw (dest, in));
47312 for (i = GET_MODE_BITSIZE (mode);
47313 i > GET_MODE_UNIT_BITSIZE (mode);
47316 half = gen_reg_rtx (mode);
47317 emit_reduc_half (half, vec, i);
47318 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
47321 dst = gen_reg_rtx (mode);
47322 emit_insn (fn (dst, half, vec));
47327 /* Target hook for scalar_mode_supported_p. */
47329 ix86_scalar_mode_supported_p (machine_mode mode)
47331 if (DECIMAL_FLOAT_MODE_P (mode))
47332 return default_decimal_float_supported_p ();
47333 else if (mode == TFmode)
47336 return default_scalar_mode_supported_p (mode);
47339 /* Implements target hook vector_mode_supported_p. */
47341 ix86_vector_mode_supported_p (machine_mode mode)
47343 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
47345 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
47347 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
47349 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
47351 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
47353 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
47358 /* Implement target hook libgcc_floating_mode_supported_p. */
47360 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
47370 #ifdef IX86_NO_LIBGCC_TFMODE
47372 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
47373 return TARGET_LONG_DOUBLE_128;
47383 /* Target hook for c_mode_for_suffix. */
47384 static machine_mode
47385 ix86_c_mode_for_suffix (char suffix)
47395 /* Worker function for TARGET_MD_ASM_ADJUST.
47397 We implement asm flag outputs, and maintain source compatibility
47398 with the old cc0-based compiler. */
47401 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
47402 vec<const char *> &constraints,
47403 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
47405 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
47406 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
47408 bool saw_asm_flag = false;
47411 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
47413 const char *con = constraints[i];
47414 if (strncmp (con, "=@cc", 4) != 0)
47417 if (strchr (con, ',') != NULL)
47419 error ("alternatives not allowed in asm flag output");
47423 bool invert = false;
47425 invert = true, con++;
47427 machine_mode mode = CCmode;
47428 rtx_code code = UNKNOWN;
47434 mode = CCAmode, code = EQ;
47435 else if (con[1] == 'e' && con[2] == 0)
47436 mode = CCCmode, code = NE;
47440 mode = CCCmode, code = EQ;
47441 else if (con[1] == 'e' && con[2] == 0)
47442 mode = CCAmode, code = NE;
47446 mode = CCCmode, code = EQ;
47450 mode = CCZmode, code = EQ;
47454 mode = CCGCmode, code = GT;
47455 else if (con[1] == 'e' && con[2] == 0)
47456 mode = CCGCmode, code = GE;
47460 mode = CCGCmode, code = LT;
47461 else if (con[1] == 'e' && con[2] == 0)
47462 mode = CCGCmode, code = LE;
47466 mode = CCOmode, code = EQ;
47470 mode = CCPmode, code = EQ;
47474 mode = CCSmode, code = EQ;
47478 mode = CCZmode, code = EQ;
47481 if (code == UNKNOWN)
47483 error ("unknown asm flag output %qs", constraints[i]);
47487 code = reverse_condition (code);
47489 rtx dest = outputs[i];
47492 /* This is the first asm flag output. Here we put the flags
47493 register in as the real output and adjust the condition to
47495 constraints[i] = "=Bf";
47496 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
47497 saw_asm_flag = true;
47501 /* We don't need the flags register as output twice. */
47502 constraints[i] = "=X";
47503 outputs[i] = gen_rtx_SCRATCH (SImode);
47506 rtx x = gen_rtx_REG (mode, FLAGS_REG);
47507 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
47509 machine_mode dest_mode = GET_MODE (dest);
47510 if (!SCALAR_INT_MODE_P (dest_mode))
47512 error ("invalid type for asm flag output");
47516 if (dest_mode == DImode && !TARGET_64BIT)
47517 dest_mode = SImode;
47519 if (dest_mode != QImode)
47521 rtx destqi = gen_reg_rtx (QImode);
47522 emit_insn (gen_rtx_SET (destqi, x));
47524 if (TARGET_ZERO_EXTEND_WITH_AND
47525 && optimize_function_for_speed_p (cfun))
47527 x = force_reg (dest_mode, const0_rtx);
47529 emit_insn (gen_movstrictqi
47530 (gen_lowpart (QImode, x), destqi));
47533 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
47536 if (dest_mode != GET_MODE (dest))
47538 rtx tmp = gen_reg_rtx (SImode);
47540 emit_insn (gen_rtx_SET (tmp, x));
47541 emit_insn (gen_zero_extendsidi2 (dest, tmp));
47544 emit_insn (gen_rtx_SET (dest, x));
47546 rtx_insn *seq = get_insns ();
47553 /* If we had no asm flag outputs, clobber the flags. */
47554 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
47555 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
47560 /* Implements target vector targetm.asm.encode_section_info. */
47562 static void ATTRIBUTE_UNUSED
47563 ix86_encode_section_info (tree decl, rtx rtl, int first)
47565 default_encode_section_info (decl, rtl, first);
47567 if (ix86_in_large_data_p (decl))
47568 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
47571 /* Worker function for REVERSE_CONDITION. */
47574 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
47576 return (mode != CCFPmode && mode != CCFPUmode
47577 ? reverse_condition (code)
47578 : reverse_condition_maybe_unordered (code));
47581 /* Output code to perform an x87 FP register move, from OPERANDS[1]
47585 output_387_reg_move (rtx insn, rtx *operands)
47587 if (REG_P (operands[0]))
47589 if (REG_P (operands[1])
47590 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47592 if (REGNO (operands[0]) == FIRST_STACK_REG)
47593 return output_387_ffreep (operands, 0);
47594 return "fstp\t%y0";
47596 if (STACK_TOP_P (operands[0]))
47597 return "fld%Z1\t%y1";
47600 else if (MEM_P (operands[0]))
47602 gcc_assert (REG_P (operands[1]));
47603 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
47604 return "fstp%Z0\t%y0";
47607 /* There is no non-popping store to memory for XFmode.
47608 So if we need one, follow the store with a load. */
47609 if (GET_MODE (operands[0]) == XFmode)
47610 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
47612 return "fst%Z0\t%y0";
47619 /* Output code to perform a conditional jump to LABEL, if C2 flag in
47620 FP status register is set. */
47623 ix86_emit_fp_unordered_jump (rtx label)
47625 rtx reg = gen_reg_rtx (HImode);
47628 emit_insn (gen_x86_fnstsw_1 (reg));
47630 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
47632 emit_insn (gen_x86_sahf_1 (reg));
47634 temp = gen_rtx_REG (CCmode, FLAGS_REG);
47635 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
47639 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
47641 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
47642 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
47645 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
47646 gen_rtx_LABEL_REF (VOIDmode, label),
47648 temp = gen_rtx_SET (pc_rtx, temp);
47650 emit_jump_insn (temp);
47651 predict_jump (REG_BR_PROB_BASE * 10 / 100);
47654 /* Output code to perform a log1p XFmode calculation. */
47656 void ix86_emit_i387_log1p (rtx op0, rtx op1)
47658 rtx_code_label *label1 = gen_label_rtx ();
47659 rtx_code_label *label2 = gen_label_rtx ();
47661 rtx tmp = gen_reg_rtx (XFmode);
47662 rtx tmp2 = gen_reg_rtx (XFmode);
47665 emit_insn (gen_absxf2 (tmp, op1));
47666 test = gen_rtx_GE (VOIDmode, tmp,
47667 const_double_from_real_value (
47668 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
47670 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
47672 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47673 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
47674 emit_jump (label2);
47676 emit_label (label1);
47677 emit_move_insn (tmp, CONST1_RTX (XFmode));
47678 emit_insn (gen_addxf3 (tmp, op1, tmp));
47679 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
47680 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
47682 emit_label (label2);
47685 /* Emit code for round calculation. */
47686 void ix86_emit_i387_round (rtx op0, rtx op1)
47688 machine_mode inmode = GET_MODE (op1);
47689 machine_mode outmode = GET_MODE (op0);
47690 rtx e1, e2, res, tmp, tmp1, half;
47691 rtx scratch = gen_reg_rtx (HImode);
47692 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
47693 rtx_code_label *jump_label = gen_label_rtx ();
47695 rtx (*gen_abs) (rtx, rtx);
47696 rtx (*gen_neg) (rtx, rtx);
47701 gen_abs = gen_abssf2;
47704 gen_abs = gen_absdf2;
47707 gen_abs = gen_absxf2;
47710 gcc_unreachable ();
47716 gen_neg = gen_negsf2;
47719 gen_neg = gen_negdf2;
47722 gen_neg = gen_negxf2;
47725 gen_neg = gen_neghi2;
47728 gen_neg = gen_negsi2;
47731 gen_neg = gen_negdi2;
47734 gcc_unreachable ();
47737 e1 = gen_reg_rtx (inmode);
47738 e2 = gen_reg_rtx (inmode);
47739 res = gen_reg_rtx (outmode);
47741 half = const_double_from_real_value (dconsthalf, inmode);
47743 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
47745 /* scratch = fxam(op1) */
47746 emit_insn (gen_rtx_SET (scratch,
47747 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
47749 /* e1 = fabs(op1) */
47750 emit_insn (gen_abs (e1, op1));
47752 /* e2 = e1 + 0.5 */
47753 half = force_reg (inmode, half);
47754 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
47756 /* res = floor(e2) */
47757 if (inmode != XFmode)
47759 tmp1 = gen_reg_rtx (XFmode);
47761 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
47771 rtx tmp0 = gen_reg_rtx (XFmode);
47773 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
47775 emit_insn (gen_rtx_SET (res,
47776 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
47777 UNSPEC_TRUNC_NOOP)));
47781 emit_insn (gen_frndintxf2_floor (res, tmp1));
47784 emit_insn (gen_lfloorxfhi2 (res, tmp1));
47787 emit_insn (gen_lfloorxfsi2 (res, tmp1));
47790 emit_insn (gen_lfloorxfdi2 (res, tmp1));
47793 gcc_unreachable ();
47796 /* flags = signbit(a) */
47797 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
47799 /* if (flags) then res = -res */
47800 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
47801 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
47802 gen_rtx_LABEL_REF (VOIDmode, jump_label),
47804 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
47805 predict_jump (REG_BR_PROB_BASE * 50 / 100);
47806 JUMP_LABEL (insn) = jump_label;
47808 emit_insn (gen_neg (res, res));
47810 emit_label (jump_label);
47811 LABEL_NUSES (jump_label) = 1;
47813 emit_move_insn (op0, res);
47816 /* Output code to perform a Newton-Rhapson approximation of a single precision
47817 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
47819 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
47821 rtx x0, x1, e0, e1;
47823 x0 = gen_reg_rtx (mode);
47824 e0 = gen_reg_rtx (mode);
47825 e1 = gen_reg_rtx (mode);
47826 x1 = gen_reg_rtx (mode);
47828 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
47830 b = force_reg (mode, b);
47832 /* x0 = rcp(b) estimate */
47833 if (mode == V16SFmode || mode == V8DFmode)
47834 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47837 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
47841 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
47844 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
47847 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
47850 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
47853 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
47856 /* Output code to perform a Newton-Rhapson approximation of a
47857 single precision floating point [reciprocal] square root. */
47859 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
47861 rtx x0, e0, e1, e2, e3, mthree, mhalf;
47865 x0 = gen_reg_rtx (mode);
47866 e0 = gen_reg_rtx (mode);
47867 e1 = gen_reg_rtx (mode);
47868 e2 = gen_reg_rtx (mode);
47869 e3 = gen_reg_rtx (mode);
47871 real_from_integer (&r, VOIDmode, -3, SIGNED);
47872 mthree = const_double_from_real_value (r, SFmode);
47874 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
47875 mhalf = const_double_from_real_value (r, SFmode);
47876 unspec = UNSPEC_RSQRT;
47878 if (VECTOR_MODE_P (mode))
47880 mthree = ix86_build_const_vector (mode, true, mthree);
47881 mhalf = ix86_build_const_vector (mode, true, mhalf);
47882 /* There is no 512-bit rsqrt. There is however rsqrt14. */
47883 if (GET_MODE_SIZE (mode) == 64)
47884 unspec = UNSPEC_RSQRT14;
47887 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
47888 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
47890 a = force_reg (mode, a);
47892 /* x0 = rsqrt(a) estimate */
47893 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
47896 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
47899 rtx zero = force_reg (mode, CONST0_RTX(mode));
47902 /* Handle masked compare. */
47903 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
47905 mask = gen_reg_rtx (HImode);
47906 /* Imm value 0x4 corresponds to not-equal comparison. */
47907 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
47908 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
47912 mask = gen_reg_rtx (mode);
47913 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
47914 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
47919 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
47921 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
47924 mthree = force_reg (mode, mthree);
47925 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
47927 mhalf = force_reg (mode, mhalf);
47929 /* e3 = -.5 * x0 */
47930 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
47932 /* e3 = -.5 * e0 */
47933 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
47934 /* ret = e2 * e3 */
47935 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
47938 #ifdef TARGET_SOLARIS
47939 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
47942 i386_solaris_elf_named_section (const char *name, unsigned int flags,
47945 /* With Binutils 2.15, the "@unwind" marker must be specified on
47946 every occurrence of the ".eh_frame" section, not just the first
47949 && strcmp (name, ".eh_frame") == 0)
47951 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
47952 flags & SECTION_WRITE ? "aw" : "a");
47957 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
47959 solaris_elf_asm_comdat_section (name, flags, decl);
47964 default_elf_asm_named_section (name, flags, decl);
47966 #endif /* TARGET_SOLARIS */
47968 /* Return the mangling of TYPE if it is an extended fundamental type. */
47970 static const char *
47971 ix86_mangle_type (const_tree type)
47973 type = TYPE_MAIN_VARIANT (type);
47975 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
47976 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
47979 switch (TYPE_MODE (type))
47982 /* __float128 is "g". */
47985 /* "long double" or __float80 is "e". */
47992 /* For 32-bit code we can save PIC register setup by using
47993 __stack_chk_fail_local hidden function instead of calling
47994 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
47995 register, so it is better to call __stack_chk_fail directly. */
47997 static tree ATTRIBUTE_UNUSED
47998 ix86_stack_protect_fail (void)
48000 return TARGET_64BIT
48001 ? default_external_stack_protect_fail ()
48002 : default_hidden_stack_protect_fail ();
48005 /* Select a format to encode pointers in exception handling data. CODE
48006 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
48007 true if the symbol may be affected by dynamic relocations.
48009 ??? All x86 object file formats are capable of representing this.
48010 After all, the relocation needed is the same as for the call insn.
48011 Whether or not a particular assembler allows us to enter such, I
48012 guess we'll have to see. */
48014 asm_preferred_eh_data_format (int code, int global)
48018 int type = DW_EH_PE_sdata8;
48020 || ix86_cmodel == CM_SMALL_PIC
48021 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
48022 type = DW_EH_PE_sdata4;
48023 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
48025 if (ix86_cmodel == CM_SMALL
48026 || (ix86_cmodel == CM_MEDIUM && code))
48027 return DW_EH_PE_udata4;
48028 return DW_EH_PE_absptr;
48031 /* Expand copysign from SIGN to the positive value ABS_VALUE
48032 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
48035 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
48037 machine_mode mode = GET_MODE (sign);
48038 rtx sgn = gen_reg_rtx (mode);
48039 if (mask == NULL_RTX)
48041 machine_mode vmode;
48043 if (mode == SFmode)
48045 else if (mode == DFmode)
48050 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
48051 if (!VECTOR_MODE_P (mode))
48053 /* We need to generate a scalar mode mask in this case. */
48054 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48055 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48056 mask = gen_reg_rtx (mode);
48057 emit_insn (gen_rtx_SET (mask, tmp));
48061 mask = gen_rtx_NOT (mode, mask);
48062 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
48063 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
48066 /* Expand fabs (OP0) and return a new rtx that holds the result. The
48067 mask for masking out the sign-bit is stored in *SMASK, if that is
48070 ix86_expand_sse_fabs (rtx op0, rtx *smask)
48072 machine_mode vmode, mode = GET_MODE (op0);
48075 xa = gen_reg_rtx (mode);
48076 if (mode == SFmode)
48078 else if (mode == DFmode)
48082 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
48083 if (!VECTOR_MODE_P (mode))
48085 /* We need to generate a scalar mode mask in this case. */
48086 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
48087 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
48088 mask = gen_reg_rtx (mode);
48089 emit_insn (gen_rtx_SET (mask, tmp));
48091 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
48099 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
48100 swapping the operands if SWAP_OPERANDS is true. The expanded
48101 code is a forward jump to a newly created label in case the
48102 comparison is true. The generated label rtx is returned. */
48103 static rtx_code_label *
48104 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
48105 bool swap_operands)
48107 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
48108 rtx_code_label *label;
48112 std::swap (op0, op1);
48114 label = gen_label_rtx ();
48115 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
48116 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
48117 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
48118 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
48119 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
48120 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
48121 JUMP_LABEL (tmp) = label;
48126 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
48127 using comparison code CODE. Operands are swapped for the comparison if
48128 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
48130 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
48131 bool swap_operands)
48133 rtx (*insn)(rtx, rtx, rtx, rtx);
48134 machine_mode mode = GET_MODE (op0);
48135 rtx mask = gen_reg_rtx (mode);
48138 std::swap (op0, op1);
48140 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
48142 emit_insn (insn (mask, op0, op1,
48143 gen_rtx_fmt_ee (code, mode, op0, op1)));
48147 /* Generate and return a rtx of mode MODE for 2**n where n is the number
48148 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
48150 ix86_gen_TWO52 (machine_mode mode)
48152 REAL_VALUE_TYPE TWO52r;
48155 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
48156 TWO52 = const_double_from_real_value (TWO52r, mode);
48157 TWO52 = force_reg (mode, TWO52);
48162 /* Expand SSE sequence for computing lround from OP1 storing
48165 ix86_expand_lround (rtx op0, rtx op1)
48167 /* C code for the stuff we're doing below:
48168 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
48171 machine_mode mode = GET_MODE (op1);
48172 const struct real_format *fmt;
48173 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48176 /* load nextafter (0.5, 0.0) */
48177 fmt = REAL_MODE_FORMAT (mode);
48178 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48179 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48181 /* adj = copysign (0.5, op1) */
48182 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
48183 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
48185 /* adj = op1 + adj */
48186 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
48188 /* op0 = (imode)adj */
48189 expand_fix (op0, adj, 0);
48192 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
48195 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
48197 /* C code for the stuff we're doing below (for do_floor):
48199 xi -= (double)xi > op1 ? 1 : 0;
48202 machine_mode fmode = GET_MODE (op1);
48203 machine_mode imode = GET_MODE (op0);
48204 rtx ireg, freg, tmp;
48205 rtx_code_label *label;
48207 /* reg = (long)op1 */
48208 ireg = gen_reg_rtx (imode);
48209 expand_fix (ireg, op1, 0);
48211 /* freg = (double)reg */
48212 freg = gen_reg_rtx (fmode);
48213 expand_float (freg, ireg, 0);
48215 /* ireg = (freg > op1) ? ireg - 1 : ireg */
48216 label = ix86_expand_sse_compare_and_jump (UNLE,
48217 freg, op1, !do_floor);
48218 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
48219 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
48220 emit_move_insn (ireg, tmp);
48222 emit_label (label);
48223 LABEL_NUSES (label) = 1;
48225 emit_move_insn (op0, ireg);
48228 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
48229 result in OPERAND0. */
48231 ix86_expand_rint (rtx operand0, rtx operand1)
48233 /* C code for the stuff we're doing below:
48234 xa = fabs (operand1);
48235 if (!isless (xa, 2**52))
48237 xa = xa + 2**52 - 2**52;
48238 return copysign (xa, operand1);
48240 machine_mode mode = GET_MODE (operand0);
48241 rtx res, xa, TWO52, mask;
48242 rtx_code_label *label;
48244 res = gen_reg_rtx (mode);
48245 emit_move_insn (res, operand1);
48247 /* xa = abs (operand1) */
48248 xa = ix86_expand_sse_fabs (res, &mask);
48250 /* if (!isless (xa, TWO52)) goto label; */
48251 TWO52 = ix86_gen_TWO52 (mode);
48252 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48254 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48255 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48257 ix86_sse_copysign_to_positive (res, xa, res, mask);
48259 emit_label (label);
48260 LABEL_NUSES (label) = 1;
48262 emit_move_insn (operand0, res);
48265 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48268 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
48270 /* C code for the stuff we expand below.
48271 double xa = fabs (x), x2;
48272 if (!isless (xa, TWO52))
48274 xa = xa + TWO52 - TWO52;
48275 x2 = copysign (xa, x);
48284 machine_mode mode = GET_MODE (operand0);
48285 rtx xa, TWO52, tmp, one, res, mask;
48286 rtx_code_label *label;
48288 TWO52 = ix86_gen_TWO52 (mode);
48290 /* Temporary for holding the result, initialized to the input
48291 operand to ease control flow. */
48292 res = gen_reg_rtx (mode);
48293 emit_move_insn (res, operand1);
48295 /* xa = abs (operand1) */
48296 xa = ix86_expand_sse_fabs (res, &mask);
48298 /* if (!isless (xa, TWO52)) goto label; */
48299 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48301 /* xa = xa + TWO52 - TWO52; */
48302 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48303 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
48305 /* xa = copysign (xa, operand1) */
48306 ix86_sse_copysign_to_positive (xa, xa, res, mask);
48308 /* generate 1.0 or -1.0 */
48309 one = force_reg (mode,
48310 const_double_from_real_value (do_floor
48311 ? dconst1 : dconstm1, mode));
48313 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48314 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48315 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48316 /* We always need to subtract here to preserve signed zero. */
48317 tmp = expand_simple_binop (mode, MINUS,
48318 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48319 emit_move_insn (res, tmp);
48321 emit_label (label);
48322 LABEL_NUSES (label) = 1;
48324 emit_move_insn (operand0, res);
48327 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
48330 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
48332 /* C code for the stuff we expand below.
48333 double xa = fabs (x), x2;
48334 if (!isless (xa, TWO52))
48336 x2 = (double)(long)x;
48343 if (HONOR_SIGNED_ZEROS (mode))
48344 return copysign (x2, x);
48347 machine_mode mode = GET_MODE (operand0);
48348 rtx xa, xi, TWO52, tmp, one, res, mask;
48349 rtx_code_label *label;
48351 TWO52 = ix86_gen_TWO52 (mode);
48353 /* Temporary for holding the result, initialized to the input
48354 operand to ease control flow. */
48355 res = gen_reg_rtx (mode);
48356 emit_move_insn (res, operand1);
48358 /* xa = abs (operand1) */
48359 xa = ix86_expand_sse_fabs (res, &mask);
48361 /* if (!isless (xa, TWO52)) goto label; */
48362 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48364 /* xa = (double)(long)x */
48365 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48366 expand_fix (xi, res, 0);
48367 expand_float (xa, xi, 0);
48370 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48372 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
48373 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
48374 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48375 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
48376 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48377 emit_move_insn (res, tmp);
48379 if (HONOR_SIGNED_ZEROS (mode))
48380 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48382 emit_label (label);
48383 LABEL_NUSES (label) = 1;
48385 emit_move_insn (operand0, res);
48388 /* Expand SSE sequence for computing round from OPERAND1 storing
48389 into OPERAND0. Sequence that works without relying on DImode truncation
48390 via cvttsd2siq that is only available on 64bit targets. */
48392 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
48394 /* C code for the stuff we expand below.
48395 double xa = fabs (x), xa2, x2;
48396 if (!isless (xa, TWO52))
48398 Using the absolute value and copying back sign makes
48399 -0.0 -> -0.0 correct.
48400 xa2 = xa + TWO52 - TWO52;
48405 else if (dxa > 0.5)
48407 x2 = copysign (xa2, x);
48410 machine_mode mode = GET_MODE (operand0);
48411 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
48412 rtx_code_label *label;
48414 TWO52 = ix86_gen_TWO52 (mode);
48416 /* Temporary for holding the result, initialized to the input
48417 operand to ease control flow. */
48418 res = gen_reg_rtx (mode);
48419 emit_move_insn (res, operand1);
48421 /* xa = abs (operand1) */
48422 xa = ix86_expand_sse_fabs (res, &mask);
48424 /* if (!isless (xa, TWO52)) goto label; */
48425 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48427 /* xa2 = xa + TWO52 - TWO52; */
48428 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48429 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
48431 /* dxa = xa2 - xa; */
48432 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
48434 /* generate 0.5, 1.0 and -0.5 */
48435 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
48436 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
48437 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
48441 tmp = gen_reg_rtx (mode);
48442 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
48443 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
48444 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48445 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48446 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
48447 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
48448 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
48449 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
48451 /* res = copysign (xa2, operand1) */
48452 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
48454 emit_label (label);
48455 LABEL_NUSES (label) = 1;
48457 emit_move_insn (operand0, res);
48460 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48463 ix86_expand_trunc (rtx operand0, rtx operand1)
48465 /* C code for SSE variant we expand below.
48466 double xa = fabs (x), x2;
48467 if (!isless (xa, TWO52))
48469 x2 = (double)(long)x;
48470 if (HONOR_SIGNED_ZEROS (mode))
48471 return copysign (x2, x);
48474 machine_mode mode = GET_MODE (operand0);
48475 rtx xa, xi, TWO52, res, mask;
48476 rtx_code_label *label;
48478 TWO52 = ix86_gen_TWO52 (mode);
48480 /* Temporary for holding the result, initialized to the input
48481 operand to ease control flow. */
48482 res = gen_reg_rtx (mode);
48483 emit_move_insn (res, operand1);
48485 /* xa = abs (operand1) */
48486 xa = ix86_expand_sse_fabs (res, &mask);
48488 /* if (!isless (xa, TWO52)) goto label; */
48489 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48491 /* x = (double)(long)x */
48492 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48493 expand_fix (xi, res, 0);
48494 expand_float (res, xi, 0);
48496 if (HONOR_SIGNED_ZEROS (mode))
48497 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
48499 emit_label (label);
48500 LABEL_NUSES (label) = 1;
48502 emit_move_insn (operand0, res);
48505 /* Expand SSE sequence for computing trunc from OPERAND1 storing
48508 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
48510 machine_mode mode = GET_MODE (operand0);
48511 rtx xa, mask, TWO52, one, res, smask, tmp;
48512 rtx_code_label *label;
48514 /* C code for SSE variant we expand below.
48515 double xa = fabs (x), x2;
48516 if (!isless (xa, TWO52))
48518 xa2 = xa + TWO52 - TWO52;
48522 x2 = copysign (xa2, x);
48526 TWO52 = ix86_gen_TWO52 (mode);
48528 /* Temporary for holding the result, initialized to the input
48529 operand to ease control flow. */
48530 res = gen_reg_rtx (mode);
48531 emit_move_insn (res, operand1);
48533 /* xa = abs (operand1) */
48534 xa = ix86_expand_sse_fabs (res, &smask);
48536 /* if (!isless (xa, TWO52)) goto label; */
48537 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48539 /* res = xa + TWO52 - TWO52; */
48540 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
48541 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
48542 emit_move_insn (res, tmp);
48545 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
48547 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
48548 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
48549 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
48550 tmp = expand_simple_binop (mode, MINUS,
48551 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
48552 emit_move_insn (res, tmp);
48554 /* res = copysign (res, operand1) */
48555 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
48557 emit_label (label);
48558 LABEL_NUSES (label) = 1;
48560 emit_move_insn (operand0, res);
48563 /* Expand SSE sequence for computing round from OPERAND1 storing
48566 ix86_expand_round (rtx operand0, rtx operand1)
48568 /* C code for the stuff we're doing below:
48569 double xa = fabs (x);
48570 if (!isless (xa, TWO52))
48572 xa = (double)(long)(xa + nextafter (0.5, 0.0));
48573 return copysign (xa, x);
48575 machine_mode mode = GET_MODE (operand0);
48576 rtx res, TWO52, xa, xi, half, mask;
48577 rtx_code_label *label;
48578 const struct real_format *fmt;
48579 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48581 /* Temporary for holding the result, initialized to the input
48582 operand to ease control flow. */
48583 res = gen_reg_rtx (mode);
48584 emit_move_insn (res, operand1);
48586 TWO52 = ix86_gen_TWO52 (mode);
48587 xa = ix86_expand_sse_fabs (res, &mask);
48588 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
48590 /* load nextafter (0.5, 0.0) */
48591 fmt = REAL_MODE_FORMAT (mode);
48592 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48593 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48595 /* xa = xa + 0.5 */
48596 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
48597 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
48599 /* xa = (double)(int64_t)xa */
48600 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
48601 expand_fix (xi, xa, 0);
48602 expand_float (xa, xi, 0);
48604 /* res = copysign (xa, operand1) */
48605 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
48607 emit_label (label);
48608 LABEL_NUSES (label) = 1;
48610 emit_move_insn (operand0, res);
48613 /* Expand SSE sequence for computing round
48614 from OP1 storing into OP0 using sse4 round insn. */
48616 ix86_expand_round_sse4 (rtx op0, rtx op1)
48618 machine_mode mode = GET_MODE (op0);
48619 rtx e1, e2, res, half;
48620 const struct real_format *fmt;
48621 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
48622 rtx (*gen_copysign) (rtx, rtx, rtx);
48623 rtx (*gen_round) (rtx, rtx, rtx);
48628 gen_copysign = gen_copysignsf3;
48629 gen_round = gen_sse4_1_roundsf2;
48632 gen_copysign = gen_copysigndf3;
48633 gen_round = gen_sse4_1_rounddf2;
48636 gcc_unreachable ();
48639 /* round (a) = trunc (a + copysign (0.5, a)) */
48641 /* load nextafter (0.5, 0.0) */
48642 fmt = REAL_MODE_FORMAT (mode);
48643 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
48644 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
48645 half = const_double_from_real_value (pred_half, mode);
48647 /* e1 = copysign (0.5, op1) */
48648 e1 = gen_reg_rtx (mode);
48649 emit_insn (gen_copysign (e1, half, op1));
48651 /* e2 = op1 + e1 */
48652 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
48654 /* res = trunc (e2) */
48655 res = gen_reg_rtx (mode);
48656 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
48658 emit_move_insn (op0, res);
48662 /* Table of valid machine attributes. */
48663 static const struct attribute_spec ix86_attribute_table[] =
48665 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
48666 affects_type_identity } */
48667 /* Stdcall attribute says callee is responsible for popping arguments
48668 if they are not variable. */
48669 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48671 /* Fastcall attribute says callee is responsible for popping arguments
48672 if they are not variable. */
48673 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48675 /* Thiscall attribute says callee is responsible for popping arguments
48676 if they are not variable. */
48677 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48679 /* Cdecl attribute says the callee is a normal C declaration */
48680 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48682 /* Regparm attribute specifies how many integer arguments are to be
48683 passed in registers. */
48684 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
48686 /* Sseregparm attribute says we are using x86_64 calling conventions
48687 for FP arguments. */
48688 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
48690 /* The transactional memory builtins are implicitly regparm or fastcall
48691 depending on the ABI. Override the generic do-nothing attribute that
48692 these builtins were declared with. */
48693 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
48695 /* force_align_arg_pointer says this function realigns the stack at entry. */
48696 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
48697 false, true, true, ix86_handle_force_align_arg_pointer_attribute, false },
48698 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
48699 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
48700 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
48701 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
48704 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48706 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
48708 #ifdef SUBTARGET_ATTRIBUTE_TABLE
48709 SUBTARGET_ATTRIBUTE_TABLE,
48711 /* ms_abi and sysv_abi calling convention function attributes. */
48712 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48713 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
48714 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
48716 { "callee_pop_aggregate_return", 1, 1, false, true, true,
48717 ix86_handle_callee_pop_aggregate_return, true },
48719 { NULL, 0, 0, false, false, false, NULL, false }
48722 /* Implement targetm.vectorize.builtin_vectorization_cost. */
48724 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
48729 switch (type_of_cost)
48732 return ix86_cost->scalar_stmt_cost;
48735 return ix86_cost->scalar_load_cost;
48738 return ix86_cost->scalar_store_cost;
48741 return ix86_cost->vec_stmt_cost;
48744 return ix86_cost->vec_align_load_cost;
48747 return ix86_cost->vec_store_cost;
48749 case vec_to_scalar:
48750 return ix86_cost->vec_to_scalar_cost;
48752 case scalar_to_vec:
48753 return ix86_cost->scalar_to_vec_cost;
48755 case unaligned_load:
48756 case unaligned_store:
48757 return ix86_cost->vec_unalign_load_cost;
48759 case cond_branch_taken:
48760 return ix86_cost->cond_taken_branch_cost;
48762 case cond_branch_not_taken:
48763 return ix86_cost->cond_not_taken_branch_cost;
48766 case vec_promote_demote:
48767 return ix86_cost->vec_stmt_cost;
48769 case vec_construct:
48770 elements = TYPE_VECTOR_SUBPARTS (vectype);
48771 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
48774 gcc_unreachable ();
48778 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
48779 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
48780 insn every time. */
48782 static GTY(()) rtx_insn *vselect_insn;
48784 /* Initialize vselect_insn. */
48787 init_vselect_insn (void)
48792 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
48793 for (i = 0; i < MAX_VECT_LEN; ++i)
48794 XVECEXP (x, 0, i) = const0_rtx;
48795 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
48797 x = gen_rtx_SET (const0_rtx, x);
48799 vselect_insn = emit_insn (x);
48803 /* Construct (set target (vec_select op0 (parallel perm))) and
48804 return true if that's a valid instruction in the active ISA. */
48807 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
48808 unsigned nelt, bool testing_p)
48811 rtx x, save_vconcat;
48814 if (vselect_insn == NULL_RTX)
48815 init_vselect_insn ();
48817 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
48818 PUT_NUM_ELEM (XVEC (x, 0), nelt);
48819 for (i = 0; i < nelt; ++i)
48820 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
48821 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48822 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
48823 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
48824 SET_DEST (PATTERN (vselect_insn)) = target;
48825 icode = recog_memoized (vselect_insn);
48827 if (icode >= 0 && !testing_p)
48828 emit_insn (copy_rtx (PATTERN (vselect_insn)));
48830 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
48831 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
48832 INSN_CODE (vselect_insn) = -1;
48837 /* Similar, but generate a vec_concat from op0 and op1 as well. */
48840 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
48841 const unsigned char *perm, unsigned nelt,
48844 machine_mode v2mode;
48848 if (vselect_insn == NULL_RTX)
48849 init_vselect_insn ();
48851 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
48852 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
48853 PUT_MODE (x, v2mode);
48856 ok = expand_vselect (target, x, perm, nelt, testing_p);
48857 XEXP (x, 0) = const0_rtx;
48858 XEXP (x, 1) = const0_rtx;
48862 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
48863 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
48866 expand_vec_perm_blend (struct expand_vec_perm_d *d)
48868 machine_mode mmode, vmode = d->vmode;
48869 unsigned i, mask, nelt = d->nelt;
48870 rtx target, op0, op1, maskop, x;
48871 rtx rperm[32], vperm;
48873 if (d->one_operand_p)
48875 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
48876 && (TARGET_AVX512BW
48877 || GET_MODE_UNIT_SIZE (vmode) >= 4))
48879 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48881 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48883 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48888 /* This is a blend, not a permute. Elements must stay in their
48889 respective lanes. */
48890 for (i = 0; i < nelt; ++i)
48892 unsigned e = d->perm[i];
48893 if (!(e == i || e == i + nelt))
48900 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
48901 decision should be extracted elsewhere, so that we only try that
48902 sequence once all budget==3 options have been tried. */
48903 target = d->target;
48922 for (i = 0; i < nelt; ++i)
48923 mask |= (d->perm[i] >= nelt) << i;
48927 for (i = 0; i < 2; ++i)
48928 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
48933 for (i = 0; i < 4; ++i)
48934 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
48939 /* See if bytes move in pairs so we can use pblendw with
48940 an immediate argument, rather than pblendvb with a vector
48942 for (i = 0; i < 16; i += 2)
48943 if (d->perm[i] + 1 != d->perm[i + 1])
48946 for (i = 0; i < nelt; ++i)
48947 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
48950 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
48951 vperm = force_reg (vmode, vperm);
48953 if (GET_MODE_SIZE (vmode) == 16)
48954 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
48956 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
48957 if (target != d->target)
48958 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
48962 for (i = 0; i < 8; ++i)
48963 mask |= (d->perm[i * 2] >= 16) << i;
48968 target = gen_reg_rtx (vmode);
48969 op0 = gen_lowpart (vmode, op0);
48970 op1 = gen_lowpart (vmode, op1);
48974 /* See if bytes move in pairs. If not, vpblendvb must be used. */
48975 for (i = 0; i < 32; i += 2)
48976 if (d->perm[i] + 1 != d->perm[i + 1])
48978 /* See if bytes move in quadruplets. If yes, vpblendd
48979 with immediate can be used. */
48980 for (i = 0; i < 32; i += 4)
48981 if (d->perm[i] + 2 != d->perm[i + 2])
48985 /* See if bytes move the same in both lanes. If yes,
48986 vpblendw with immediate can be used. */
48987 for (i = 0; i < 16; i += 2)
48988 if (d->perm[i] + 16 != d->perm[i + 16])
48991 /* Use vpblendw. */
48992 for (i = 0; i < 16; ++i)
48993 mask |= (d->perm[i * 2] >= 32) << i;
48998 /* Use vpblendd. */
48999 for (i = 0; i < 8; ++i)
49000 mask |= (d->perm[i * 4] >= 32) << i;
49005 /* See if words move in pairs. If yes, vpblendd can be used. */
49006 for (i = 0; i < 16; i += 2)
49007 if (d->perm[i] + 1 != d->perm[i + 1])
49011 /* See if words move the same in both lanes. If not,
49012 vpblendvb must be used. */
49013 for (i = 0; i < 8; i++)
49014 if (d->perm[i] + 8 != d->perm[i + 8])
49016 /* Use vpblendvb. */
49017 for (i = 0; i < 32; ++i)
49018 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
49022 target = gen_reg_rtx (vmode);
49023 op0 = gen_lowpart (vmode, op0);
49024 op1 = gen_lowpart (vmode, op1);
49025 goto finish_pblendvb;
49028 /* Use vpblendw. */
49029 for (i = 0; i < 16; ++i)
49030 mask |= (d->perm[i] >= 16) << i;
49034 /* Use vpblendd. */
49035 for (i = 0; i < 8; ++i)
49036 mask |= (d->perm[i * 2] >= 16) << i;
49041 /* Use vpblendd. */
49042 for (i = 0; i < 4; ++i)
49043 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
49048 gcc_unreachable ();
49071 if (mmode != VOIDmode)
49072 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
49074 maskop = GEN_INT (mask);
49076 /* This matches five different patterns with the different modes. */
49077 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
49078 x = gen_rtx_SET (target, x);
49080 if (target != d->target)
49081 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49086 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49087 in terms of the variable form of vpermilps.
49089 Note that we will have already failed the immediate input vpermilps,
49090 which requires that the high and low part shuffle be identical; the
49091 variable form doesn't require that. */
49094 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
49096 rtx rperm[8], vperm;
49099 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
49102 /* We can only permute within the 128-bit lane. */
49103 for (i = 0; i < 8; ++i)
49105 unsigned e = d->perm[i];
49106 if (i < 4 ? e >= 4 : e < 4)
49113 for (i = 0; i < 8; ++i)
49115 unsigned e = d->perm[i];
49117 /* Within each 128-bit lane, the elements of op0 are numbered
49118 from 0 and the elements of op1 are numbered from 4. */
49124 rperm[i] = GEN_INT (e);
49127 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
49128 vperm = force_reg (V8SImode, vperm);
49129 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
49134 /* Return true if permutation D can be performed as VMODE permutation
49138 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
49140 unsigned int i, j, chunk;
49142 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
49143 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
49144 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
49147 if (GET_MODE_NUNITS (vmode) >= d->nelt)
49150 chunk = d->nelt / GET_MODE_NUNITS (vmode);
49151 for (i = 0; i < d->nelt; i += chunk)
49152 if (d->perm[i] & (chunk - 1))
49155 for (j = 1; j < chunk; ++j)
49156 if (d->perm[i] + j != d->perm[i + j])
49162 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49163 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
49166 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
49168 unsigned i, nelt, eltsz, mask;
49169 unsigned char perm[64];
49170 machine_mode vmode = V16QImode;
49171 rtx rperm[64], vperm, target, op0, op1;
49175 if (!d->one_operand_p)
49177 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
49180 && valid_perm_using_mode_p (V2TImode, d))
49185 /* Use vperm2i128 insn. The pattern uses
49186 V4DImode instead of V2TImode. */
49187 target = d->target;
49188 if (d->vmode != V4DImode)
49189 target = gen_reg_rtx (V4DImode);
49190 op0 = gen_lowpart (V4DImode, d->op0);
49191 op1 = gen_lowpart (V4DImode, d->op1);
49193 = GEN_INT ((d->perm[0] / (nelt / 2))
49194 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
49195 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
49196 if (target != d->target)
49197 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49205 if (GET_MODE_SIZE (d->vmode) == 16)
49210 else if (GET_MODE_SIZE (d->vmode) == 32)
49215 /* V4DImode should be already handled through
49216 expand_vselect by vpermq instruction. */
49217 gcc_assert (d->vmode != V4DImode);
49220 if (d->vmode == V8SImode
49221 || d->vmode == V16HImode
49222 || d->vmode == V32QImode)
49224 /* First see if vpermq can be used for
49225 V8SImode/V16HImode/V32QImode. */
49226 if (valid_perm_using_mode_p (V4DImode, d))
49228 for (i = 0; i < 4; i++)
49229 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
49232 target = gen_reg_rtx (V4DImode);
49233 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
49236 emit_move_insn (d->target,
49237 gen_lowpart (d->vmode, target));
49243 /* Next see if vpermd can be used. */
49244 if (valid_perm_using_mode_p (V8SImode, d))
49247 /* Or if vpermps can be used. */
49248 else if (d->vmode == V8SFmode)
49251 if (vmode == V32QImode)
49253 /* vpshufb only works intra lanes, it is not
49254 possible to shuffle bytes in between the lanes. */
49255 for (i = 0; i < nelt; ++i)
49256 if ((d->perm[i] ^ i) & (nelt / 2))
49260 else if (GET_MODE_SIZE (d->vmode) == 64)
49262 if (!TARGET_AVX512BW)
49265 /* If vpermq didn't work, vpshufb won't work either. */
49266 if (d->vmode == V8DFmode || d->vmode == V8DImode)
49270 if (d->vmode == V16SImode
49271 || d->vmode == V32HImode
49272 || d->vmode == V64QImode)
49274 /* First see if vpermq can be used for
49275 V16SImode/V32HImode/V64QImode. */
49276 if (valid_perm_using_mode_p (V8DImode, d))
49278 for (i = 0; i < 8; i++)
49279 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
49282 target = gen_reg_rtx (V8DImode);
49283 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
49286 emit_move_insn (d->target,
49287 gen_lowpart (d->vmode, target));
49293 /* Next see if vpermd can be used. */
49294 if (valid_perm_using_mode_p (V16SImode, d))
49297 /* Or if vpermps can be used. */
49298 else if (d->vmode == V16SFmode)
49300 if (vmode == V64QImode)
49302 /* vpshufb only works intra lanes, it is not
49303 possible to shuffle bytes in between the lanes. */
49304 for (i = 0; i < nelt; ++i)
49305 if ((d->perm[i] ^ i) & (nelt / 4))
49316 if (vmode == V8SImode)
49317 for (i = 0; i < 8; ++i)
49318 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
49319 else if (vmode == V16SImode)
49320 for (i = 0; i < 16; ++i)
49321 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
49324 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49325 if (!d->one_operand_p)
49326 mask = 2 * nelt - 1;
49327 else if (vmode == V16QImode)
49329 else if (vmode == V64QImode)
49330 mask = nelt / 4 - 1;
49332 mask = nelt / 2 - 1;
49334 for (i = 0; i < nelt; ++i)
49336 unsigned j, e = d->perm[i] & mask;
49337 for (j = 0; j < eltsz; ++j)
49338 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
49342 vperm = gen_rtx_CONST_VECTOR (vmode,
49343 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
49344 vperm = force_reg (vmode, vperm);
49346 target = d->target;
49347 if (d->vmode != vmode)
49348 target = gen_reg_rtx (vmode);
49349 op0 = gen_lowpart (vmode, d->op0);
49350 if (d->one_operand_p)
49352 if (vmode == V16QImode)
49353 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
49354 else if (vmode == V32QImode)
49355 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
49356 else if (vmode == V64QImode)
49357 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
49358 else if (vmode == V8SFmode)
49359 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
49360 else if (vmode == V8SImode)
49361 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
49362 else if (vmode == V16SFmode)
49363 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
49364 else if (vmode == V16SImode)
49365 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
49367 gcc_unreachable ();
49371 op1 = gen_lowpart (vmode, d->op1);
49372 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
49374 if (target != d->target)
49375 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
49380 /* For V*[QHS]Imode permutations, check if the same permutation
49381 can't be performed in a 2x, 4x or 8x wider inner mode. */
49384 canonicalize_vector_int_perm (const struct expand_vec_perm_d *d,
49385 struct expand_vec_perm_d *nd)
49388 enum machine_mode mode = VOIDmode;
49392 case V16QImode: mode = V8HImode; break;
49393 case V32QImode: mode = V16HImode; break;
49394 case V64QImode: mode = V32HImode; break;
49395 case V8HImode: mode = V4SImode; break;
49396 case V16HImode: mode = V8SImode; break;
49397 case V32HImode: mode = V16SImode; break;
49398 case V4SImode: mode = V2DImode; break;
49399 case V8SImode: mode = V4DImode; break;
49400 case V16SImode: mode = V8DImode; break;
49401 default: return false;
49403 for (i = 0; i < d->nelt; i += 2)
49404 if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1)
49407 nd->nelt = d->nelt / 2;
49408 for (i = 0; i < nd->nelt; i++)
49409 nd->perm[i] = d->perm[2 * i] / 2;
49410 if (GET_MODE_INNER (mode) != DImode)
49411 canonicalize_vector_int_perm (nd, nd);
49414 nd->one_operand_p = d->one_operand_p;
49415 nd->testing_p = d->testing_p;
49416 if (d->op0 == d->op1)
49417 nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0);
49420 nd->op0 = gen_lowpart (nd->vmode, d->op0);
49421 nd->op1 = gen_lowpart (nd->vmode, d->op1);
49424 nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1);
49426 nd->target = gen_reg_rtx (nd->vmode);
49431 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
49432 in a single instruction. */
49435 expand_vec_perm_1 (struct expand_vec_perm_d *d)
49437 unsigned i, nelt = d->nelt;
49438 struct expand_vec_perm_d nd;
49440 /* Check plain VEC_SELECT first, because AVX has instructions that could
49441 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
49442 input where SEL+CONCAT may not. */
49443 if (d->one_operand_p)
49445 int mask = nelt - 1;
49446 bool identity_perm = true;
49447 bool broadcast_perm = true;
49449 for (i = 0; i < nelt; i++)
49451 nd.perm[i] = d->perm[i] & mask;
49452 if (nd.perm[i] != i)
49453 identity_perm = false;
49455 broadcast_perm = false;
49461 emit_move_insn (d->target, d->op0);
49464 else if (broadcast_perm && TARGET_AVX2)
49466 /* Use vpbroadcast{b,w,d}. */
49467 rtx (*gen) (rtx, rtx) = NULL;
49471 if (TARGET_AVX512BW)
49472 gen = gen_avx512bw_vec_dupv64qi_1;
49475 gen = gen_avx2_pbroadcastv32qi_1;
49478 if (TARGET_AVX512BW)
49479 gen = gen_avx512bw_vec_dupv32hi_1;
49482 gen = gen_avx2_pbroadcastv16hi_1;
49485 if (TARGET_AVX512F)
49486 gen = gen_avx512f_vec_dupv16si_1;
49489 gen = gen_avx2_pbroadcastv8si_1;
49492 gen = gen_avx2_pbroadcastv16qi;
49495 gen = gen_avx2_pbroadcastv8hi;
49498 if (TARGET_AVX512F)
49499 gen = gen_avx512f_vec_dupv16sf_1;
49502 gen = gen_avx2_vec_dupv8sf_1;
49505 if (TARGET_AVX512F)
49506 gen = gen_avx512f_vec_dupv8df_1;
49509 if (TARGET_AVX512F)
49510 gen = gen_avx512f_vec_dupv8di_1;
49512 /* For other modes prefer other shuffles this function creates. */
49518 emit_insn (gen (d->target, d->op0));
49523 if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p))
49526 /* There are plenty of patterns in sse.md that are written for
49527 SEL+CONCAT and are not replicated for a single op. Perhaps
49528 that should be changed, to avoid the nastiness here. */
49530 /* Recognize interleave style patterns, which means incrementing
49531 every other permutation operand. */
49532 for (i = 0; i < nelt; i += 2)
49534 nd.perm[i] = d->perm[i] & mask;
49535 nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt;
49537 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49541 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
49544 for (i = 0; i < nelt; i += 4)
49546 nd.perm[i + 0] = d->perm[i + 0] & mask;
49547 nd.perm[i + 1] = d->perm[i + 1] & mask;
49548 nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt;
49549 nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt;
49552 if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt,
49558 /* Finally, try the fully general two operand permute. */
49559 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
49563 /* Recognize interleave style patterns with reversed operands. */
49564 if (!d->one_operand_p)
49566 for (i = 0; i < nelt; ++i)
49568 unsigned e = d->perm[i];
49576 if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt,
49581 /* Try the SSE4.1 blend variable merge instructions. */
49582 if (expand_vec_perm_blend (d))
49585 /* Try one of the AVX vpermil variable permutations. */
49586 if (expand_vec_perm_vpermil (d))
49589 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
49590 vpshufb, vpermd, vpermps or vpermq variable permutation. */
49591 if (expand_vec_perm_pshufb (d))
49594 /* Try the AVX2 vpalignr instruction. */
49595 if (expand_vec_perm_palignr (d, true))
49598 /* Try the AVX512F vpermi2 instructions. */
49599 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
49602 /* See if we can get the same permutation in different vector integer
49604 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
49607 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
49613 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
49614 in terms of a pair of pshuflw + pshufhw instructions. */
49617 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
49619 unsigned char perm2[MAX_VECT_LEN];
49623 if (d->vmode != V8HImode || !d->one_operand_p)
49626 /* The two permutations only operate in 64-bit lanes. */
49627 for (i = 0; i < 4; ++i)
49628 if (d->perm[i] >= 4)
49630 for (i = 4; i < 8; ++i)
49631 if (d->perm[i] < 4)
49637 /* Emit the pshuflw. */
49638 memcpy (perm2, d->perm, 4);
49639 for (i = 4; i < 8; ++i)
49641 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
49644 /* Emit the pshufhw. */
49645 memcpy (perm2 + 4, d->perm + 4, 4);
49646 for (i = 0; i < 4; ++i)
49648 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
49654 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49655 the permutation using the SSSE3 palignr instruction. This succeeds
49656 when all of the elements in PERM fit within one vector and we merely
49657 need to shift them down so that a single vector permutation has a
49658 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
49659 the vpalignr instruction itself can perform the requested permutation. */
49662 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
49664 unsigned i, nelt = d->nelt;
49665 unsigned min, max, minswap, maxswap;
49666 bool in_order, ok, swap = false;
49668 struct expand_vec_perm_d dcopy;
49670 /* Even with AVX, palignr only operates on 128-bit vectors,
49671 in AVX2 palignr operates on both 128-bit lanes. */
49672 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
49673 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
49678 minswap = 2 * nelt;
49680 for (i = 0; i < nelt; ++i)
49682 unsigned e = d->perm[i];
49683 unsigned eswap = d->perm[i] ^ nelt;
49684 if (GET_MODE_SIZE (d->vmode) == 32)
49686 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
49687 eswap = e ^ (nelt / 2);
49693 if (eswap < minswap)
49695 if (eswap > maxswap)
49699 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
49701 if (d->one_operand_p
49703 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
49704 ? nelt / 2 : nelt))
49711 /* Given that we have SSSE3, we know we'll be able to implement the
49712 single operand permutation after the palignr with pshufb for
49713 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
49715 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
49721 dcopy.op0 = d->op1;
49722 dcopy.op1 = d->op0;
49723 for (i = 0; i < nelt; ++i)
49724 dcopy.perm[i] ^= nelt;
49728 for (i = 0; i < nelt; ++i)
49730 unsigned e = dcopy.perm[i];
49731 if (GET_MODE_SIZE (d->vmode) == 32
49733 && (e & (nelt / 2 - 1)) < min)
49734 e = e - min - (nelt / 2);
49741 dcopy.one_operand_p = true;
49743 if (single_insn_only_p && !in_order)
49746 /* For AVX2, test whether we can permute the result in one instruction. */
49751 dcopy.op1 = dcopy.op0;
49752 return expand_vec_perm_1 (&dcopy);
49755 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
49756 if (GET_MODE_SIZE (d->vmode) == 16)
49758 target = gen_reg_rtx (TImode);
49759 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
49760 gen_lowpart (TImode, dcopy.op0), shift));
49764 target = gen_reg_rtx (V2TImode);
49765 emit_insn (gen_avx2_palignrv2ti (target,
49766 gen_lowpart (V2TImode, dcopy.op1),
49767 gen_lowpart (V2TImode, dcopy.op0),
49771 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
49773 /* Test for the degenerate case where the alignment by itself
49774 produces the desired permutation. */
49777 emit_move_insn (d->target, dcopy.op0);
49781 ok = expand_vec_perm_1 (&dcopy);
49782 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
49787 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
49788 the permutation using the SSE4_1 pblendv instruction. Potentially
49789 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
49792 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
49794 unsigned i, which, nelt = d->nelt;
49795 struct expand_vec_perm_d dcopy, dcopy1;
49796 machine_mode vmode = d->vmode;
49799 /* Use the same checks as in expand_vec_perm_blend. */
49800 if (d->one_operand_p)
49802 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
49804 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
49806 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
49811 /* Figure out where permutation elements stay not in their
49812 respective lanes. */
49813 for (i = 0, which = 0; i < nelt; ++i)
49815 unsigned e = d->perm[i];
49817 which |= (e < nelt ? 1 : 2);
49819 /* We can pblend the part where elements stay not in their
49820 respective lanes only when these elements are all in one
49821 half of a permutation.
49822 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
49823 lanes, but both 8 and 9 >= 8
49824 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
49825 respective lanes and 8 >= 8, but 2 not. */
49826 if (which != 1 && which != 2)
49828 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
49831 /* First we apply one operand permutation to the part where
49832 elements stay not in their respective lanes. */
49835 dcopy.op0 = dcopy.op1 = d->op1;
49837 dcopy.op0 = dcopy.op1 = d->op0;
49839 dcopy.target = gen_reg_rtx (vmode);
49840 dcopy.one_operand_p = true;
49842 for (i = 0; i < nelt; ++i)
49843 dcopy.perm[i] = d->perm[i] & (nelt - 1);
49845 ok = expand_vec_perm_1 (&dcopy);
49846 if (GET_MODE_SIZE (vmode) != 16 && !ok)
49853 /* Next we put permuted elements into their positions. */
49856 dcopy1.op1 = dcopy.target;
49858 dcopy1.op0 = dcopy.target;
49860 for (i = 0; i < nelt; ++i)
49861 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
49863 ok = expand_vec_perm_blend (&dcopy1);
49869 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
49871 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
49872 a two vector permutation into a single vector permutation by using
49873 an interleave operation to merge the vectors. */
49876 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
49878 struct expand_vec_perm_d dremap, dfinal;
49879 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
49880 unsigned HOST_WIDE_INT contents;
49881 unsigned char remap[2 * MAX_VECT_LEN];
49883 bool ok, same_halves = false;
49885 if (GET_MODE_SIZE (d->vmode) == 16)
49887 if (d->one_operand_p)
49890 else if (GET_MODE_SIZE (d->vmode) == 32)
49894 /* For 32-byte modes allow even d->one_operand_p.
49895 The lack of cross-lane shuffling in some instructions
49896 might prevent a single insn shuffle. */
49898 dfinal.testing_p = true;
49899 /* If expand_vec_perm_interleave3 can expand this into
49900 a 3 insn sequence, give up and let it be expanded as
49901 3 insn sequence. While that is one insn longer,
49902 it doesn't need a memory operand and in the common
49903 case that both interleave low and high permutations
49904 with the same operands are adjacent needs 4 insns
49905 for both after CSE. */
49906 if (expand_vec_perm_interleave3 (&dfinal))
49912 /* Examine from whence the elements come. */
49914 for (i = 0; i < nelt; ++i)
49915 contents |= HOST_WIDE_INT_1U << d->perm[i];
49917 memset (remap, 0xff, sizeof (remap));
49920 if (GET_MODE_SIZE (d->vmode) == 16)
49922 unsigned HOST_WIDE_INT h1, h2, h3, h4;
49924 /* Split the two input vectors into 4 halves. */
49925 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
49930 /* If the elements from the low halves use interleave low, and similarly
49931 for interleave high. If the elements are from mis-matched halves, we
49932 can use shufps for V4SF/V4SI or do a DImode shuffle. */
49933 if ((contents & (h1 | h3)) == contents)
49936 for (i = 0; i < nelt2; ++i)
49939 remap[i + nelt] = i * 2 + 1;
49940 dremap.perm[i * 2] = i;
49941 dremap.perm[i * 2 + 1] = i + nelt;
49943 if (!TARGET_SSE2 && d->vmode == V4SImode)
49944 dremap.vmode = V4SFmode;
49946 else if ((contents & (h2 | h4)) == contents)
49949 for (i = 0; i < nelt2; ++i)
49951 remap[i + nelt2] = i * 2;
49952 remap[i + nelt + nelt2] = i * 2 + 1;
49953 dremap.perm[i * 2] = i + nelt2;
49954 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
49956 if (!TARGET_SSE2 && d->vmode == V4SImode)
49957 dremap.vmode = V4SFmode;
49959 else if ((contents & (h1 | h4)) == contents)
49962 for (i = 0; i < nelt2; ++i)
49965 remap[i + nelt + nelt2] = i + nelt2;
49966 dremap.perm[i] = i;
49967 dremap.perm[i + nelt2] = i + nelt + nelt2;
49972 dremap.vmode = V2DImode;
49974 dremap.perm[0] = 0;
49975 dremap.perm[1] = 3;
49978 else if ((contents & (h2 | h3)) == contents)
49981 for (i = 0; i < nelt2; ++i)
49983 remap[i + nelt2] = i;
49984 remap[i + nelt] = i + nelt2;
49985 dremap.perm[i] = i + nelt2;
49986 dremap.perm[i + nelt2] = i + nelt;
49991 dremap.vmode = V2DImode;
49993 dremap.perm[0] = 1;
49994 dremap.perm[1] = 2;
50002 unsigned int nelt4 = nelt / 4, nzcnt = 0;
50003 unsigned HOST_WIDE_INT q[8];
50004 unsigned int nonzero_halves[4];
50006 /* Split the two input vectors into 8 quarters. */
50007 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
50008 for (i = 1; i < 8; ++i)
50009 q[i] = q[0] << (nelt4 * i);
50010 for (i = 0; i < 4; ++i)
50011 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
50013 nonzero_halves[nzcnt] = i;
50019 gcc_assert (d->one_operand_p);
50020 nonzero_halves[1] = nonzero_halves[0];
50021 same_halves = true;
50023 else if (d->one_operand_p)
50025 gcc_assert (nonzero_halves[0] == 0);
50026 gcc_assert (nonzero_halves[1] == 1);
50031 if (d->perm[0] / nelt2 == nonzero_halves[1])
50033 /* Attempt to increase the likelihood that dfinal
50034 shuffle will be intra-lane. */
50035 std::swap (nonzero_halves[0], nonzero_halves[1]);
50038 /* vperm2f128 or vperm2i128. */
50039 for (i = 0; i < nelt2; ++i)
50041 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
50042 remap[i + nonzero_halves[0] * nelt2] = i;
50043 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
50044 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
50047 if (d->vmode != V8SFmode
50048 && d->vmode != V4DFmode
50049 && d->vmode != V8SImode)
50051 dremap.vmode = V8SImode;
50053 for (i = 0; i < 4; ++i)
50055 dremap.perm[i] = i + nonzero_halves[0] * 4;
50056 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
50060 else if (d->one_operand_p)
50062 else if (TARGET_AVX2
50063 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
50066 for (i = 0; i < nelt4; ++i)
50069 remap[i + nelt] = i * 2 + 1;
50070 remap[i + nelt2] = i * 2 + nelt2;
50071 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
50072 dremap.perm[i * 2] = i;
50073 dremap.perm[i * 2 + 1] = i + nelt;
50074 dremap.perm[i * 2 + nelt2] = i + nelt2;
50075 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
50078 else if (TARGET_AVX2
50079 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
50082 for (i = 0; i < nelt4; ++i)
50084 remap[i + nelt4] = i * 2;
50085 remap[i + nelt + nelt4] = i * 2 + 1;
50086 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
50087 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
50088 dremap.perm[i * 2] = i + nelt4;
50089 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
50090 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
50091 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
50098 /* Use the remapping array set up above to move the elements from their
50099 swizzled locations into their final destinations. */
50101 for (i = 0; i < nelt; ++i)
50103 unsigned e = remap[d->perm[i]];
50104 gcc_assert (e < nelt);
50105 /* If same_halves is true, both halves of the remapped vector are the
50106 same. Avoid cross-lane accesses if possible. */
50107 if (same_halves && i >= nelt2)
50109 gcc_assert (e < nelt2);
50110 dfinal.perm[i] = e + nelt2;
50113 dfinal.perm[i] = e;
50117 dremap.target = gen_reg_rtx (dremap.vmode);
50118 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50120 dfinal.op1 = dfinal.op0;
50121 dfinal.one_operand_p = true;
50123 /* Test if the final remap can be done with a single insn. For V4SFmode or
50124 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
50126 ok = expand_vec_perm_1 (&dfinal);
50127 seq = get_insns ();
50136 if (dremap.vmode != dfinal.vmode)
50138 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
50139 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
50142 ok = expand_vec_perm_1 (&dremap);
50149 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50150 a single vector cross-lane permutation into vpermq followed
50151 by any of the single insn permutations. */
50154 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
50156 struct expand_vec_perm_d dremap, dfinal;
50157 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
50158 unsigned contents[2];
50162 && (d->vmode == V32QImode || d->vmode == V16HImode)
50163 && d->one_operand_p))
50168 for (i = 0; i < nelt2; ++i)
50170 contents[0] |= 1u << (d->perm[i] / nelt4);
50171 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
50174 for (i = 0; i < 2; ++i)
50176 unsigned int cnt = 0;
50177 for (j = 0; j < 4; ++j)
50178 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
50186 dremap.vmode = V4DImode;
50188 dremap.target = gen_reg_rtx (V4DImode);
50189 dremap.op0 = gen_lowpart (V4DImode, d->op0);
50190 dremap.op1 = dremap.op0;
50191 dremap.one_operand_p = true;
50192 for (i = 0; i < 2; ++i)
50194 unsigned int cnt = 0;
50195 for (j = 0; j < 4; ++j)
50196 if ((contents[i] & (1u << j)) != 0)
50197 dremap.perm[2 * i + cnt++] = j;
50198 for (; cnt < 2; ++cnt)
50199 dremap.perm[2 * i + cnt] = 0;
50203 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
50204 dfinal.op1 = dfinal.op0;
50205 dfinal.one_operand_p = true;
50206 for (i = 0, j = 0; i < nelt; ++i)
50210 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
50211 if ((d->perm[i] / nelt4) == dremap.perm[j])
50213 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
50214 dfinal.perm[i] |= nelt4;
50216 gcc_unreachable ();
50219 ok = expand_vec_perm_1 (&dremap);
50222 ok = expand_vec_perm_1 (&dfinal);
50228 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
50229 a vector permutation using two instructions, vperm2f128 resp.
50230 vperm2i128 followed by any single in-lane permutation. */
50233 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
50235 struct expand_vec_perm_d dfirst, dsecond;
50236 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
50240 || GET_MODE_SIZE (d->vmode) != 32
50241 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
50245 dsecond.one_operand_p = false;
50246 dsecond.testing_p = true;
50248 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
50249 immediate. For perm < 16 the second permutation uses
50250 d->op0 as first operand, for perm >= 16 it uses d->op1
50251 as first operand. The second operand is the result of
50253 for (perm = 0; perm < 32; perm++)
50255 /* Ignore permutations which do not move anything cross-lane. */
50258 /* The second shuffle for e.g. V4DFmode has
50259 0123 and ABCD operands.
50260 Ignore AB23, as 23 is already in the second lane
50261 of the first operand. */
50262 if ((perm & 0xc) == (1 << 2)) continue;
50263 /* And 01CD, as 01 is in the first lane of the first
50265 if ((perm & 3) == 0) continue;
50266 /* And 4567, as then the vperm2[fi]128 doesn't change
50267 anything on the original 4567 second operand. */
50268 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
50272 /* The second shuffle for e.g. V4DFmode has
50273 4567 and ABCD operands.
50274 Ignore AB67, as 67 is already in the second lane
50275 of the first operand. */
50276 if ((perm & 0xc) == (3 << 2)) continue;
50277 /* And 45CD, as 45 is in the first lane of the first
50279 if ((perm & 3) == 2) continue;
50280 /* And 0123, as then the vperm2[fi]128 doesn't change
50281 anything on the original 0123 first operand. */
50282 if ((perm & 0xf) == (1 << 2)) continue;
50285 for (i = 0; i < nelt; i++)
50287 j = d->perm[i] / nelt2;
50288 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
50289 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
50290 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
50291 dsecond.perm[i] = d->perm[i] & (nelt - 1);
50299 ok = expand_vec_perm_1 (&dsecond);
50310 /* Found a usable second shuffle. dfirst will be
50311 vperm2f128 on d->op0 and d->op1. */
50312 dsecond.testing_p = false;
50314 dfirst.target = gen_reg_rtx (d->vmode);
50315 for (i = 0; i < nelt; i++)
50316 dfirst.perm[i] = (i & (nelt2 - 1))
50317 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
50319 canonicalize_perm (&dfirst);
50320 ok = expand_vec_perm_1 (&dfirst);
50323 /* And dsecond is some single insn shuffle, taking
50324 d->op0 and result of vperm2f128 (if perm < 16) or
50325 d->op1 and result of vperm2f128 (otherwise). */
50327 dsecond.op0 = dsecond.op1;
50328 dsecond.op1 = dfirst.target;
50330 ok = expand_vec_perm_1 (&dsecond);
50336 /* For one operand, the only useful vperm2f128 permutation is 0x01
50338 if (d->one_operand_p)
50345 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
50346 a two vector permutation using 2 intra-lane interleave insns
50347 and cross-lane shuffle for 32-byte vectors. */
50350 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
50353 rtx (*gen) (rtx, rtx, rtx);
50355 if (d->one_operand_p)
50357 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
50359 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
50365 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
50367 for (i = 0; i < nelt; i += 2)
50368 if (d->perm[i] != d->perm[0] + i / 2
50369 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
50379 gen = gen_vec_interleave_highv32qi;
50381 gen = gen_vec_interleave_lowv32qi;
50385 gen = gen_vec_interleave_highv16hi;
50387 gen = gen_vec_interleave_lowv16hi;
50391 gen = gen_vec_interleave_highv8si;
50393 gen = gen_vec_interleave_lowv8si;
50397 gen = gen_vec_interleave_highv4di;
50399 gen = gen_vec_interleave_lowv4di;
50403 gen = gen_vec_interleave_highv8sf;
50405 gen = gen_vec_interleave_lowv8sf;
50409 gen = gen_vec_interleave_highv4df;
50411 gen = gen_vec_interleave_lowv4df;
50414 gcc_unreachable ();
50417 emit_insn (gen (d->target, d->op0, d->op1));
50421 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
50422 a single vector permutation using a single intra-lane vector
50423 permutation, vperm2f128 swapping the lanes and vblend* insn blending
50424 the non-swapped and swapped vectors together. */
50427 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
50429 struct expand_vec_perm_d dfirst, dsecond;
50430 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
50433 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
50437 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
50438 || !d->one_operand_p)
50442 for (i = 0; i < nelt; i++)
50443 dfirst.perm[i] = 0xff;
50444 for (i = 0, msk = 0; i < nelt; i++)
50446 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
50447 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
50449 dfirst.perm[j] = d->perm[i];
50453 for (i = 0; i < nelt; i++)
50454 if (dfirst.perm[i] == 0xff)
50455 dfirst.perm[i] = i;
50458 dfirst.target = gen_reg_rtx (dfirst.vmode);
50461 ok = expand_vec_perm_1 (&dfirst);
50462 seq = get_insns ();
50474 dsecond.op0 = dfirst.target;
50475 dsecond.op1 = dfirst.target;
50476 dsecond.one_operand_p = true;
50477 dsecond.target = gen_reg_rtx (dsecond.vmode);
50478 for (i = 0; i < nelt; i++)
50479 dsecond.perm[i] = i ^ nelt2;
50481 ok = expand_vec_perm_1 (&dsecond);
50484 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
50485 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
50489 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
50490 permutation using two vperm2f128, followed by a vshufpd insn blending
50491 the two vectors together. */
50494 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
50496 struct expand_vec_perm_d dfirst, dsecond, dthird;
50499 if (!TARGET_AVX || (d->vmode != V4DFmode))
50509 dfirst.perm[0] = (d->perm[0] & ~1);
50510 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
50511 dfirst.perm[2] = (d->perm[2] & ~1);
50512 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
50513 dsecond.perm[0] = (d->perm[1] & ~1);
50514 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
50515 dsecond.perm[2] = (d->perm[3] & ~1);
50516 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
50517 dthird.perm[0] = (d->perm[0] % 2);
50518 dthird.perm[1] = (d->perm[1] % 2) + 4;
50519 dthird.perm[2] = (d->perm[2] % 2) + 2;
50520 dthird.perm[3] = (d->perm[3] % 2) + 6;
50522 dfirst.target = gen_reg_rtx (dfirst.vmode);
50523 dsecond.target = gen_reg_rtx (dsecond.vmode);
50524 dthird.op0 = dfirst.target;
50525 dthird.op1 = dsecond.target;
50526 dthird.one_operand_p = false;
50528 canonicalize_perm (&dfirst);
50529 canonicalize_perm (&dsecond);
50531 ok = expand_vec_perm_1 (&dfirst)
50532 && expand_vec_perm_1 (&dsecond)
50533 && expand_vec_perm_1 (&dthird);
50540 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
50541 permutation with two pshufb insns and an ior. We should have already
50542 failed all two instruction sequences. */
50545 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
50547 rtx rperm[2][16], vperm, l, h, op, m128;
50548 unsigned int i, nelt, eltsz;
50550 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
50552 gcc_assert (!d->one_operand_p);
50558 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50560 /* Generate two permutation masks. If the required element is within
50561 the given vector it is shuffled into the proper lane. If the required
50562 element is in the other vector, force a zero into the lane by setting
50563 bit 7 in the permutation mask. */
50564 m128 = GEN_INT (-128);
50565 for (i = 0; i < nelt; ++i)
50567 unsigned j, e = d->perm[i];
50568 unsigned which = (e >= nelt);
50572 for (j = 0; j < eltsz; ++j)
50574 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
50575 rperm[1-which][i*eltsz + j] = m128;
50579 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
50580 vperm = force_reg (V16QImode, vperm);
50582 l = gen_reg_rtx (V16QImode);
50583 op = gen_lowpart (V16QImode, d->op0);
50584 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
50586 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
50587 vperm = force_reg (V16QImode, vperm);
50589 h = gen_reg_rtx (V16QImode);
50590 op = gen_lowpart (V16QImode, d->op1);
50591 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
50594 if (d->vmode != V16QImode)
50595 op = gen_reg_rtx (V16QImode);
50596 emit_insn (gen_iorv16qi3 (op, l, h));
50597 if (op != d->target)
50598 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50603 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
50604 with two vpshufb insns, vpermq and vpor. We should have already failed
50605 all two or three instruction sequences. */
50608 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
50610 rtx rperm[2][32], vperm, l, h, hp, op, m128;
50611 unsigned int i, nelt, eltsz;
50614 || !d->one_operand_p
50615 || (d->vmode != V32QImode && d->vmode != V16HImode))
50622 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50624 /* Generate two permutation masks. If the required element is within
50625 the same lane, it is shuffled in. If the required element from the
50626 other lane, force a zero by setting bit 7 in the permutation mask.
50627 In the other mask the mask has non-negative elements if element
50628 is requested from the other lane, but also moved to the other lane,
50629 so that the result of vpshufb can have the two V2TImode halves
50631 m128 = GEN_INT (-128);
50632 for (i = 0; i < nelt; ++i)
50634 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50635 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
50637 for (j = 0; j < eltsz; ++j)
50639 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
50640 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
50644 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50645 vperm = force_reg (V32QImode, vperm);
50647 h = gen_reg_rtx (V32QImode);
50648 op = gen_lowpart (V32QImode, d->op0);
50649 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50651 /* Swap the 128-byte lanes of h into hp. */
50652 hp = gen_reg_rtx (V4DImode);
50653 op = gen_lowpart (V4DImode, h);
50654 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
50657 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50658 vperm = force_reg (V32QImode, vperm);
50660 l = gen_reg_rtx (V32QImode);
50661 op = gen_lowpart (V32QImode, d->op0);
50662 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50665 if (d->vmode != V32QImode)
50666 op = gen_reg_rtx (V32QImode);
50667 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
50668 if (op != d->target)
50669 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50674 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50675 and extract-odd permutations of two V32QImode and V16QImode operand
50676 with two vpshufb insns, vpor and vpermq. We should have already
50677 failed all two or three instruction sequences. */
50680 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
50682 rtx rperm[2][32], vperm, l, h, ior, op, m128;
50683 unsigned int i, nelt, eltsz;
50686 || d->one_operand_p
50687 || (d->vmode != V32QImode && d->vmode != V16HImode))
50690 for (i = 0; i < d->nelt; ++i)
50691 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
50698 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
50700 /* Generate two permutation masks. In the first permutation mask
50701 the first quarter will contain indexes for the first half
50702 of the op0, the second quarter will contain bit 7 set, third quarter
50703 will contain indexes for the second half of the op0 and the
50704 last quarter bit 7 set. In the second permutation mask
50705 the first quarter will contain bit 7 set, the second quarter
50706 indexes for the first half of the op1, the third quarter bit 7 set
50707 and last quarter indexes for the second half of the op1.
50708 I.e. the first mask e.g. for V32QImode extract even will be:
50709 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
50710 (all values masked with 0xf except for -128) and second mask
50711 for extract even will be
50712 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
50713 m128 = GEN_INT (-128);
50714 for (i = 0; i < nelt; ++i)
50716 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
50717 unsigned which = d->perm[i] >= nelt;
50718 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
50720 for (j = 0; j < eltsz; ++j)
50722 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
50723 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
50727 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
50728 vperm = force_reg (V32QImode, vperm);
50730 l = gen_reg_rtx (V32QImode);
50731 op = gen_lowpart (V32QImode, d->op0);
50732 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
50734 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
50735 vperm = force_reg (V32QImode, vperm);
50737 h = gen_reg_rtx (V32QImode);
50738 op = gen_lowpart (V32QImode, d->op1);
50739 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
50741 ior = gen_reg_rtx (V32QImode);
50742 emit_insn (gen_iorv32qi3 (ior, l, h));
50744 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
50745 op = gen_reg_rtx (V4DImode);
50746 ior = gen_lowpart (V4DImode, ior);
50747 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
50748 const1_rtx, GEN_INT (3)));
50749 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
50754 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50755 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
50756 with two "and" and "pack" or two "shift" and "pack" insns. We should
50757 have already failed all two instruction sequences. */
50760 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
50762 rtx op, dop0, dop1, t, rperm[16];
50763 unsigned i, odd, c, s, nelt = d->nelt;
50764 bool end_perm = false;
50765 machine_mode half_mode;
50766 rtx (*gen_and) (rtx, rtx, rtx);
50767 rtx (*gen_pack) (rtx, rtx, rtx);
50768 rtx (*gen_shift) (rtx, rtx, rtx);
50770 if (d->one_operand_p)
50776 /* Required for "pack". */
50777 if (!TARGET_SSE4_1)
50781 half_mode = V4SImode;
50782 gen_and = gen_andv4si3;
50783 gen_pack = gen_sse4_1_packusdw;
50784 gen_shift = gen_lshrv4si3;
50787 /* No check as all instructions are SSE2. */
50790 half_mode = V8HImode;
50791 gen_and = gen_andv8hi3;
50792 gen_pack = gen_sse2_packuswb;
50793 gen_shift = gen_lshrv8hi3;
50800 half_mode = V8SImode;
50801 gen_and = gen_andv8si3;
50802 gen_pack = gen_avx2_packusdw;
50803 gen_shift = gen_lshrv8si3;
50811 half_mode = V16HImode;
50812 gen_and = gen_andv16hi3;
50813 gen_pack = gen_avx2_packuswb;
50814 gen_shift = gen_lshrv16hi3;
50818 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
50819 general shuffles. */
50823 /* Check that permutation is even or odd. */
50828 for (i = 1; i < nelt; ++i)
50829 if (d->perm[i] != 2 * i + odd)
50835 dop0 = gen_reg_rtx (half_mode);
50836 dop1 = gen_reg_rtx (half_mode);
50839 for (i = 0; i < nelt / 2; i++)
50840 rperm[i] = GEN_INT (c);
50841 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
50842 t = force_reg (half_mode, t);
50843 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
50844 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
50848 emit_insn (gen_shift (dop0,
50849 gen_lowpart (half_mode, d->op0),
50851 emit_insn (gen_shift (dop1,
50852 gen_lowpart (half_mode, d->op1),
50855 /* In AVX2 for 256 bit case we need to permute pack result. */
50856 if (TARGET_AVX2 && end_perm)
50858 op = gen_reg_rtx (d->vmode);
50859 t = gen_reg_rtx (V4DImode);
50860 emit_insn (gen_pack (op, dop0, dop1));
50861 emit_insn (gen_avx2_permv4di_1 (t,
50862 gen_lowpart (V4DImode, op),
50867 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
50870 emit_insn (gen_pack (d->target, dop0, dop1));
50875 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
50876 and extract-odd permutations of two V64QI operands
50877 with two "shifts", two "truncs" and one "concat" insns for "odd"
50878 and two "truncs" and one concat insn for "even."
50879 Have already failed all two instruction sequences. */
50882 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
50884 rtx t1, t2, t3, t4;
50885 unsigned i, odd, nelt = d->nelt;
50887 if (!TARGET_AVX512BW
50888 || d->one_operand_p
50889 || d->vmode != V64QImode)
50892 /* Check that permutation is even or odd. */
50897 for (i = 1; i < nelt; ++i)
50898 if (d->perm[i] != 2 * i + odd)
50907 t1 = gen_reg_rtx (V32HImode);
50908 t2 = gen_reg_rtx (V32HImode);
50909 emit_insn (gen_lshrv32hi3 (t1,
50910 gen_lowpart (V32HImode, d->op0),
50912 emit_insn (gen_lshrv32hi3 (t2,
50913 gen_lowpart (V32HImode, d->op1),
50918 t1 = gen_lowpart (V32HImode, d->op0);
50919 t2 = gen_lowpart (V32HImode, d->op1);
50922 t3 = gen_reg_rtx (V32QImode);
50923 t4 = gen_reg_rtx (V32QImode);
50924 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
50925 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
50926 emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
50931 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
50932 and extract-odd permutations. */
50935 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
50937 rtx t1, t2, t3, t4, t5;
50944 t1 = gen_reg_rtx (V4DFmode);
50945 t2 = gen_reg_rtx (V4DFmode);
50947 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
50948 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
50949 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
50951 /* Now an unpck[lh]pd will produce the result required. */
50953 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
50955 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
50961 int mask = odd ? 0xdd : 0x88;
50965 t1 = gen_reg_rtx (V8SFmode);
50966 t2 = gen_reg_rtx (V8SFmode);
50967 t3 = gen_reg_rtx (V8SFmode);
50969 /* Shuffle within the 128-bit lanes to produce:
50970 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
50971 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
50974 /* Shuffle the lanes around to produce:
50975 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
50976 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
50979 /* Shuffle within the 128-bit lanes to produce:
50980 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
50981 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
50983 /* Shuffle within the 128-bit lanes to produce:
50984 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
50985 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
50987 /* Shuffle the lanes around to produce:
50988 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
50989 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
50998 /* These are always directly implementable by expand_vec_perm_1. */
50999 gcc_unreachable ();
51003 return expand_vec_perm_even_odd_pack (d);
51004 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
51005 return expand_vec_perm_pshufb2 (d);
51010 /* We need 2*log2(N)-1 operations to achieve odd/even
51011 with interleave. */
51012 t1 = gen_reg_rtx (V8HImode);
51013 t2 = gen_reg_rtx (V8HImode);
51014 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
51015 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
51016 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
51017 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
51019 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
51021 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
51027 return expand_vec_perm_even_odd_pack (d);
51031 return expand_vec_perm_even_odd_pack (d);
51034 return expand_vec_perm_even_odd_trunc (d);
51039 struct expand_vec_perm_d d_copy = *d;
51040 d_copy.vmode = V4DFmode;
51042 d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1);
51044 d_copy.target = gen_reg_rtx (V4DFmode);
51045 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
51046 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
51047 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51050 emit_move_insn (d->target,
51051 gen_lowpart (V4DImode, d_copy.target));
51060 t1 = gen_reg_rtx (V4DImode);
51061 t2 = gen_reg_rtx (V4DImode);
51063 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
51064 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
51065 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
51067 /* Now an vpunpck[lh]qdq will produce the result required. */
51069 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
51071 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
51078 struct expand_vec_perm_d d_copy = *d;
51079 d_copy.vmode = V8SFmode;
51081 d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1);
51083 d_copy.target = gen_reg_rtx (V8SFmode);
51084 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
51085 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
51086 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
51089 emit_move_insn (d->target,
51090 gen_lowpart (V8SImode, d_copy.target));
51099 t1 = gen_reg_rtx (V8SImode);
51100 t2 = gen_reg_rtx (V8SImode);
51101 t3 = gen_reg_rtx (V4DImode);
51102 t4 = gen_reg_rtx (V4DImode);
51103 t5 = gen_reg_rtx (V4DImode);
51105 /* Shuffle the lanes around into
51106 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
51107 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
51108 gen_lowpart (V4DImode, d->op1),
51110 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
51111 gen_lowpart (V4DImode, d->op1),
51114 /* Swap the 2nd and 3rd position in each lane into
51115 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
51116 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
51117 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51118 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
51119 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
51121 /* Now an vpunpck[lh]qdq will produce
51122 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
51124 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
51125 gen_lowpart (V4DImode, t2));
51127 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
51128 gen_lowpart (V4DImode, t2));
51130 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
51134 gcc_unreachable ();
51140 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51141 extract-even and extract-odd permutations. */
51144 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
51146 unsigned i, odd, nelt = d->nelt;
51149 if (odd != 0 && odd != 1)
51152 for (i = 1; i < nelt; ++i)
51153 if (d->perm[i] != 2 * i + odd)
51156 return expand_vec_perm_even_odd_1 (d, odd);
51159 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
51160 permutations. We assume that expand_vec_perm_1 has already failed. */
51163 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
51165 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
51166 machine_mode vmode = d->vmode;
51167 unsigned char perm2[4];
51168 rtx op0 = d->op0, dest;
51175 /* These are special-cased in sse.md so that we can optionally
51176 use the vbroadcast instruction. They expand to two insns
51177 if the input happens to be in a register. */
51178 gcc_unreachable ();
51184 /* These are always implementable using standard shuffle patterns. */
51185 gcc_unreachable ();
51189 /* These can be implemented via interleave. We save one insn by
51190 stopping once we have promoted to V4SImode and then use pshufd. */
51196 rtx (*gen) (rtx, rtx, rtx)
51197 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
51198 : gen_vec_interleave_lowv8hi;
51202 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
51203 : gen_vec_interleave_highv8hi;
51208 dest = gen_reg_rtx (vmode);
51209 emit_insn (gen (dest, op0, op0));
51210 vmode = get_mode_wider_vector (vmode);
51211 op0 = gen_lowpart (vmode, dest);
51213 while (vmode != V4SImode);
51215 memset (perm2, elt, 4);
51216 dest = gen_reg_rtx (V4SImode);
51217 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
51220 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
51228 /* For AVX2 broadcasts of the first element vpbroadcast* or
51229 vpermq should be used by expand_vec_perm_1. */
51230 gcc_assert (!TARGET_AVX2 || d->perm[0]);
51234 gcc_unreachable ();
51238 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
51239 broadcast permutations. */
51242 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
51244 unsigned i, elt, nelt = d->nelt;
51246 if (!d->one_operand_p)
51250 for (i = 1; i < nelt; ++i)
51251 if (d->perm[i] != elt)
51254 return expand_vec_perm_broadcast_1 (d);
51257 /* Implement arbitrary permutations of two V64QImode operands
51258 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
51260 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
51262 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
51268 struct expand_vec_perm_d ds[2];
51269 rtx rperm[128], vperm, target0, target1;
51270 unsigned int i, nelt;
51271 machine_mode vmode;
51276 for (i = 0; i < 2; i++)
51279 ds[i].vmode = V32HImode;
51281 ds[i].target = gen_reg_rtx (V32HImode);
51282 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
51283 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
51286 /* Prepare permutations such that the first one takes care of
51287 putting the even bytes into the right positions or one higher
51288 positions (ds[0]) and the second one takes care of
51289 putting the odd bytes into the right positions or one below
51292 for (i = 0; i < nelt; i++)
51294 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
51297 rperm[i] = constm1_rtx;
51298 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51302 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
51303 rperm[i + 64] = constm1_rtx;
51307 bool ok = expand_vec_perm_1 (&ds[0]);
51309 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
51311 ok = expand_vec_perm_1 (&ds[1]);
51313 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
51315 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
51316 vperm = force_reg (vmode, vperm);
51317 target0 = gen_reg_rtx (V64QImode);
51318 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
51320 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
51321 vperm = force_reg (vmode, vperm);
51322 target1 = gen_reg_rtx (V64QImode);
51323 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
51325 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
51329 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
51330 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
51331 all the shorter instruction sequences. */
51334 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
51336 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
51337 unsigned int i, nelt, eltsz;
51341 || d->one_operand_p
51342 || (d->vmode != V32QImode && d->vmode != V16HImode))
51349 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
51351 /* Generate 4 permutation masks. If the required element is within
51352 the same lane, it is shuffled in. If the required element from the
51353 other lane, force a zero by setting bit 7 in the permutation mask.
51354 In the other mask the mask has non-negative elements if element
51355 is requested from the other lane, but also moved to the other lane,
51356 so that the result of vpshufb can have the two V2TImode halves
51358 m128 = GEN_INT (-128);
51359 for (i = 0; i < 32; ++i)
51361 rperm[0][i] = m128;
51362 rperm[1][i] = m128;
51363 rperm[2][i] = m128;
51364 rperm[3][i] = m128;
51370 for (i = 0; i < nelt; ++i)
51372 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
51373 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
51374 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
51376 for (j = 0; j < eltsz; ++j)
51377 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
51378 used[which] = true;
51381 for (i = 0; i < 2; ++i)
51383 if (!used[2 * i + 1])
51388 vperm = gen_rtx_CONST_VECTOR (V32QImode,
51389 gen_rtvec_v (32, rperm[2 * i + 1]));
51390 vperm = force_reg (V32QImode, vperm);
51391 h[i] = gen_reg_rtx (V32QImode);
51392 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51393 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
51396 /* Swap the 128-byte lanes of h[X]. */
51397 for (i = 0; i < 2; ++i)
51399 if (h[i] == NULL_RTX)
51401 op = gen_reg_rtx (V4DImode);
51402 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
51403 const2_rtx, GEN_INT (3), const0_rtx,
51405 h[i] = gen_lowpart (V32QImode, op);
51408 for (i = 0; i < 2; ++i)
51415 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
51416 vperm = force_reg (V32QImode, vperm);
51417 l[i] = gen_reg_rtx (V32QImode);
51418 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
51419 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
51422 for (i = 0; i < 2; ++i)
51426 op = gen_reg_rtx (V32QImode);
51427 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
51434 gcc_assert (l[0] && l[1]);
51436 if (d->vmode != V32QImode)
51437 op = gen_reg_rtx (V32QImode);
51438 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
51439 if (op != d->target)
51440 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
51444 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
51445 With all of the interface bits taken care of, perform the expansion
51446 in D and return true on success. */
51449 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
51451 /* Try a single instruction expansion. */
51452 if (expand_vec_perm_1 (d))
51455 /* Try sequences of two instructions. */
51457 if (expand_vec_perm_pshuflw_pshufhw (d))
51460 if (expand_vec_perm_palignr (d, false))
51463 if (expand_vec_perm_interleave2 (d))
51466 if (expand_vec_perm_broadcast (d))
51469 if (expand_vec_perm_vpermq_perm_1 (d))
51472 if (expand_vec_perm_vperm2f128 (d))
51475 if (expand_vec_perm_pblendv (d))
51478 /* Try sequences of three instructions. */
51480 if (expand_vec_perm_even_odd_pack (d))
51483 if (expand_vec_perm_2vperm2f128_vshuf (d))
51486 if (expand_vec_perm_pshufb2 (d))
51489 if (expand_vec_perm_interleave3 (d))
51492 if (expand_vec_perm_vperm2f128_vblend (d))
51495 /* Try sequences of four instructions. */
51497 if (expand_vec_perm_even_odd_trunc (d))
51499 if (expand_vec_perm_vpshufb2_vpermq (d))
51502 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
51505 if (expand_vec_perm_vpermi2_vpshub2 (d))
51508 /* ??? Look for narrow permutations whose element orderings would
51509 allow the promotion to a wider mode. */
51511 /* ??? Look for sequences of interleave or a wider permute that place
51512 the data into the correct lanes for a half-vector shuffle like
51513 pshuf[lh]w or vpermilps. */
51515 /* ??? Look for sequences of interleave that produce the desired results.
51516 The combinatorics of punpck[lh] get pretty ugly... */
51518 if (expand_vec_perm_even_odd (d))
51521 /* Even longer sequences. */
51522 if (expand_vec_perm_vpshufb4_vpermq2 (d))
51525 /* See if we can get the same permutation in different vector integer
51527 struct expand_vec_perm_d nd;
51528 if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd))
51531 emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target));
51538 /* If a permutation only uses one operand, make it clear. Returns true
51539 if the permutation references both operands. */
51542 canonicalize_perm (struct expand_vec_perm_d *d)
51544 int i, which, nelt = d->nelt;
51546 for (i = which = 0; i < nelt; ++i)
51547 which |= (d->perm[i] < nelt ? 1 : 2);
51549 d->one_operand_p = true;
51556 if (!rtx_equal_p (d->op0, d->op1))
51558 d->one_operand_p = false;
51561 /* The elements of PERM do not suggest that only the first operand
51562 is used, but both operands are identical. Allow easier matching
51563 of the permutation by folding the permutation into the single
51568 for (i = 0; i < nelt; ++i)
51569 d->perm[i] &= nelt - 1;
51578 return (which == 3);
51582 ix86_expand_vec_perm_const (rtx operands[4])
51584 struct expand_vec_perm_d d;
51585 unsigned char perm[MAX_VECT_LEN];
51590 d.target = operands[0];
51591 d.op0 = operands[1];
51592 d.op1 = operands[2];
51595 d.vmode = GET_MODE (d.target);
51596 gcc_assert (VECTOR_MODE_P (d.vmode));
51597 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51598 d.testing_p = false;
51600 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
51601 gcc_assert (XVECLEN (sel, 0) == nelt);
51602 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
51604 for (i = 0; i < nelt; ++i)
51606 rtx e = XVECEXP (sel, 0, i);
51607 int ei = INTVAL (e) & (2 * nelt - 1);
51612 two_args = canonicalize_perm (&d);
51614 if (ix86_expand_vec_perm_const_1 (&d))
51617 /* If the selector says both arguments are needed, but the operands are the
51618 same, the above tried to expand with one_operand_p and flattened selector.
51619 If that didn't work, retry without one_operand_p; we succeeded with that
51621 if (two_args && d.one_operand_p)
51623 d.one_operand_p = false;
51624 memcpy (d.perm, perm, sizeof (perm));
51625 return ix86_expand_vec_perm_const_1 (&d);
51631 /* Implement targetm.vectorize.vec_perm_const_ok. */
51634 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
51635 const unsigned char *sel)
51637 struct expand_vec_perm_d d;
51638 unsigned int i, nelt, which;
51642 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51643 d.testing_p = true;
51645 /* Given sufficient ISA support we can just return true here
51646 for selected vector modes. */
51653 if (TARGET_AVX512F)
51654 /* All implementable with a single vpermi2 insn. */
51658 if (TARGET_AVX512BW)
51659 /* All implementable with a single vpermi2 insn. */
51663 if (TARGET_AVX512BW)
51664 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
51671 if (TARGET_AVX512VL)
51672 /* All implementable with a single vpermi2 insn. */
51677 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51682 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
51689 /* All implementable with a single vpperm insn. */
51692 /* All implementable with 2 pshufb + 1 ior. */
51698 /* All implementable with shufpd or unpck[lh]pd. */
51704 /* Extract the values from the vector CST into the permutation
51706 memcpy (d.perm, sel, nelt);
51707 for (i = which = 0; i < nelt; ++i)
51709 unsigned char e = d.perm[i];
51710 gcc_assert (e < 2 * nelt);
51711 which |= (e < nelt ? 1 : 2);
51714 /* For all elements from second vector, fold the elements to first. */
51716 for (i = 0; i < nelt; ++i)
51719 /* Check whether the mask can be applied to the vector type. */
51720 d.one_operand_p = (which != 3);
51722 /* Implementable with shufps or pshufd. */
51723 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
51726 /* Otherwise we have to go through the motions and see if we can
51727 figure out how to generate the requested permutation. */
51728 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
51729 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
51730 if (!d.one_operand_p)
51731 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
51734 ret = ix86_expand_vec_perm_const_1 (&d);
51741 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
51743 struct expand_vec_perm_d d;
51749 d.vmode = GET_MODE (targ);
51750 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51751 d.one_operand_p = false;
51752 d.testing_p = false;
51754 for (i = 0; i < nelt; ++i)
51755 d.perm[i] = i * 2 + odd;
51757 /* We'll either be able to implement the permutation directly... */
51758 if (expand_vec_perm_1 (&d))
51761 /* ... or we use the special-case patterns. */
51762 expand_vec_perm_even_odd_1 (&d, odd);
51766 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
51768 struct expand_vec_perm_d d;
51769 unsigned i, nelt, base;
51775 d.vmode = GET_MODE (targ);
51776 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
51777 d.one_operand_p = false;
51778 d.testing_p = false;
51780 base = high_p ? nelt / 2 : 0;
51781 for (i = 0; i < nelt / 2; ++i)
51783 d.perm[i * 2] = i + base;
51784 d.perm[i * 2 + 1] = i + base + nelt;
51787 /* Note that for AVX this isn't one instruction. */
51788 ok = ix86_expand_vec_perm_const_1 (&d);
51793 /* Expand a vector operation CODE for a V*QImode in terms of the
51794 same operation on V*HImode. */
51797 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
51799 machine_mode qimode = GET_MODE (dest);
51800 machine_mode himode;
51801 rtx (*gen_il) (rtx, rtx, rtx);
51802 rtx (*gen_ih) (rtx, rtx, rtx);
51803 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
51804 struct expand_vec_perm_d d;
51805 bool ok, full_interleave;
51806 bool uns_p = false;
51813 gen_il = gen_vec_interleave_lowv16qi;
51814 gen_ih = gen_vec_interleave_highv16qi;
51817 himode = V16HImode;
51818 gen_il = gen_avx2_interleave_lowv32qi;
51819 gen_ih = gen_avx2_interleave_highv32qi;
51822 himode = V32HImode;
51823 gen_il = gen_avx512bw_interleave_lowv64qi;
51824 gen_ih = gen_avx512bw_interleave_highv64qi;
51827 gcc_unreachable ();
51830 op2_l = op2_h = op2;
51834 /* Unpack data such that we've got a source byte in each low byte of
51835 each word. We don't care what goes into the high byte of each word.
51836 Rather than trying to get zero in there, most convenient is to let
51837 it be a copy of the low byte. */
51838 op2_l = gen_reg_rtx (qimode);
51839 op2_h = gen_reg_rtx (qimode);
51840 emit_insn (gen_il (op2_l, op2, op2));
51841 emit_insn (gen_ih (op2_h, op2, op2));
51844 op1_l = gen_reg_rtx (qimode);
51845 op1_h = gen_reg_rtx (qimode);
51846 emit_insn (gen_il (op1_l, op1, op1));
51847 emit_insn (gen_ih (op1_h, op1, op1));
51848 full_interleave = qimode == V16QImode;
51856 op1_l = gen_reg_rtx (himode);
51857 op1_h = gen_reg_rtx (himode);
51858 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
51859 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
51860 full_interleave = true;
51863 gcc_unreachable ();
51866 /* Perform the operation. */
51867 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
51869 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
51871 gcc_assert (res_l && res_h);
51873 /* Merge the data back into the right place. */
51875 d.op0 = gen_lowpart (qimode, res_l);
51876 d.op1 = gen_lowpart (qimode, res_h);
51878 d.nelt = GET_MODE_NUNITS (qimode);
51879 d.one_operand_p = false;
51880 d.testing_p = false;
51882 if (full_interleave)
51884 /* For SSE2, we used an full interleave, so the desired
51885 results are in the even elements. */
51886 for (i = 0; i < 64; ++i)
51891 /* For AVX, the interleave used above was not cross-lane. So the
51892 extraction is evens but with the second and third quarter swapped.
51893 Happily, that is even one insn shorter than even extraction. */
51894 for (i = 0; i < 64; ++i)
51895 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
51898 ok = ix86_expand_vec_perm_const_1 (&d);
51901 set_unique_reg_note (get_last_insn (), REG_EQUAL,
51902 gen_rtx_fmt_ee (code, qimode, op1, op2));
51905 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
51906 if op is CONST_VECTOR with all odd elements equal to their
51907 preceding element. */
51910 const_vector_equal_evenodd_p (rtx op)
51912 machine_mode mode = GET_MODE (op);
51913 int i, nunits = GET_MODE_NUNITS (mode);
51914 if (GET_CODE (op) != CONST_VECTOR
51915 || nunits != CONST_VECTOR_NUNITS (op))
51917 for (i = 0; i < nunits; i += 2)
51918 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
51924 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
51925 bool uns_p, bool odd_p)
51927 machine_mode mode = GET_MODE (op1);
51928 machine_mode wmode = GET_MODE (dest);
51930 rtx orig_op1 = op1, orig_op2 = op2;
51932 if (!nonimmediate_operand (op1, mode))
51933 op1 = force_reg (mode, op1);
51934 if (!nonimmediate_operand (op2, mode))
51935 op2 = force_reg (mode, op2);
51937 /* We only play even/odd games with vectors of SImode. */
51938 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
51940 /* If we're looking for the odd results, shift those members down to
51941 the even slots. For some cpus this is faster than a PSHUFD. */
51944 /* For XOP use vpmacsdqh, but only for smult, as it is only
51946 if (TARGET_XOP && mode == V4SImode && !uns_p)
51948 x = force_reg (wmode, CONST0_RTX (wmode));
51949 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
51953 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
51954 if (!const_vector_equal_evenodd_p (orig_op1))
51955 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
51956 x, NULL, 1, OPTAB_DIRECT);
51957 if (!const_vector_equal_evenodd_p (orig_op2))
51958 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
51959 x, NULL, 1, OPTAB_DIRECT);
51960 op1 = gen_lowpart (mode, op1);
51961 op2 = gen_lowpart (mode, op2);
51964 if (mode == V16SImode)
51967 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
51969 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
51971 else if (mode == V8SImode)
51974 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
51976 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
51979 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
51980 else if (TARGET_SSE4_1)
51981 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
51984 rtx s1, s2, t0, t1, t2;
51986 /* The easiest way to implement this without PMULDQ is to go through
51987 the motions as if we are performing a full 64-bit multiply. With
51988 the exception that we need to do less shuffling of the elements. */
51990 /* Compute the sign-extension, aka highparts, of the two operands. */
51991 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51992 op1, pc_rtx, pc_rtx);
51993 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
51994 op2, pc_rtx, pc_rtx);
51996 /* Multiply LO(A) * HI(B), and vice-versa. */
51997 t1 = gen_reg_rtx (wmode);
51998 t2 = gen_reg_rtx (wmode);
51999 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
52000 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
52002 /* Multiply LO(A) * LO(B). */
52003 t0 = gen_reg_rtx (wmode);
52004 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
52006 /* Combine and shift the highparts into place. */
52007 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
52008 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
52011 /* Combine high and low parts. */
52012 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
52019 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
52020 bool uns_p, bool high_p)
52022 machine_mode wmode = GET_MODE (dest);
52023 machine_mode mode = GET_MODE (op1);
52024 rtx t1, t2, t3, t4, mask;
52029 t1 = gen_reg_rtx (mode);
52030 t2 = gen_reg_rtx (mode);
52031 if (TARGET_XOP && !uns_p)
52033 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
52034 shuffle the elements once so that all elements are in the right
52035 place for immediate use: { A C B D }. */
52036 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
52037 const1_rtx, GEN_INT (3)));
52038 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
52039 const1_rtx, GEN_INT (3)));
52043 /* Put the elements into place for the multiply. */
52044 ix86_expand_vec_interleave (t1, op1, op1, high_p);
52045 ix86_expand_vec_interleave (t2, op2, op2, high_p);
52048 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
52052 /* Shuffle the elements between the lanes. After this we
52053 have { A B E F | C D G H } for each operand. */
52054 t1 = gen_reg_rtx (V4DImode);
52055 t2 = gen_reg_rtx (V4DImode);
52056 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
52057 const0_rtx, const2_rtx,
52058 const1_rtx, GEN_INT (3)));
52059 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
52060 const0_rtx, const2_rtx,
52061 const1_rtx, GEN_INT (3)));
52063 /* Shuffle the elements within the lanes. After this we
52064 have { A A B B | C C D D } or { E E F F | G G H H }. */
52065 t3 = gen_reg_rtx (V8SImode);
52066 t4 = gen_reg_rtx (V8SImode);
52067 mask = GEN_INT (high_p
52068 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
52069 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
52070 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
52071 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
52073 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
52078 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
52079 uns_p, OPTAB_DIRECT);
52080 t2 = expand_binop (mode,
52081 uns_p ? umul_highpart_optab : smul_highpart_optab,
52082 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
52083 gcc_assert (t1 && t2);
52085 t3 = gen_reg_rtx (mode);
52086 ix86_expand_vec_interleave (t3, t1, t2, high_p);
52087 emit_move_insn (dest, gen_lowpart (wmode, t3));
52095 t1 = gen_reg_rtx (wmode);
52096 t2 = gen_reg_rtx (wmode);
52097 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
52098 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
52100 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
52104 gcc_unreachable ();
52109 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
52111 rtx res_1, res_2, res_3, res_4;
52113 res_1 = gen_reg_rtx (V4SImode);
52114 res_2 = gen_reg_rtx (V4SImode);
52115 res_3 = gen_reg_rtx (V2DImode);
52116 res_4 = gen_reg_rtx (V2DImode);
52117 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
52118 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
52120 /* Move the results in element 2 down to element 1; we don't care
52121 what goes in elements 2 and 3. Then we can merge the parts
52122 back together with an interleave.
52124 Note that two other sequences were tried:
52125 (1) Use interleaves at the start instead of psrldq, which allows
52126 us to use a single shufps to merge things back at the end.
52127 (2) Use shufps here to combine the two vectors, then pshufd to
52128 put the elements in the correct order.
52129 In both cases the cost of the reformatting stall was too high
52130 and the overall sequence slower. */
52132 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
52133 const0_rtx, const2_rtx,
52134 const0_rtx, const0_rtx));
52135 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
52136 const0_rtx, const2_rtx,
52137 const0_rtx, const0_rtx));
52138 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
52140 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
52144 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
52146 machine_mode mode = GET_MODE (op0);
52147 rtx t1, t2, t3, t4, t5, t6;
52149 if (TARGET_AVX512DQ && mode == V8DImode)
52150 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
52151 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
52152 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
52153 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
52154 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
52155 else if (TARGET_XOP && mode == V2DImode)
52157 /* op1: A,B,C,D, op2: E,F,G,H */
52158 op1 = gen_lowpart (V4SImode, op1);
52159 op2 = gen_lowpart (V4SImode, op2);
52161 t1 = gen_reg_rtx (V4SImode);
52162 t2 = gen_reg_rtx (V4SImode);
52163 t3 = gen_reg_rtx (V2DImode);
52164 t4 = gen_reg_rtx (V2DImode);
52167 emit_insn (gen_sse2_pshufd_1 (t1, op1,
52173 /* t2: (B*E),(A*F),(D*G),(C*H) */
52174 emit_insn (gen_mulv4si3 (t2, t1, op2));
52176 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
52177 emit_insn (gen_xop_phadddq (t3, t2));
52179 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
52180 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
52182 /* Multiply lower parts and add all */
52183 t5 = gen_reg_rtx (V2DImode);
52184 emit_insn (gen_vec_widen_umult_even_v4si (t5,
52185 gen_lowpart (V4SImode, op1),
52186 gen_lowpart (V4SImode, op2)));
52187 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
52192 machine_mode nmode;
52193 rtx (*umul) (rtx, rtx, rtx);
52195 if (mode == V2DImode)
52197 umul = gen_vec_widen_umult_even_v4si;
52200 else if (mode == V4DImode)
52202 umul = gen_vec_widen_umult_even_v8si;
52205 else if (mode == V8DImode)
52207 umul = gen_vec_widen_umult_even_v16si;
52211 gcc_unreachable ();
52214 /* Multiply low parts. */
52215 t1 = gen_reg_rtx (mode);
52216 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
52218 /* Shift input vectors right 32 bits so we can multiply high parts. */
52220 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
52221 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
52223 /* Multiply high parts by low parts. */
52224 t4 = gen_reg_rtx (mode);
52225 t5 = gen_reg_rtx (mode);
52226 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
52227 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
52229 /* Combine and shift the highparts back. */
52230 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
52231 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
52233 /* Combine high and low parts. */
52234 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
52237 set_unique_reg_note (get_last_insn (), REG_EQUAL,
52238 gen_rtx_MULT (mode, op1, op2));
52241 /* Return 1 if control tansfer instruction INSN
52242 should be encoded with bnd prefix.
52243 If insn is NULL then return 1 when control
52244 transfer instructions should be prefixed with
52245 bnd by default for current function. */
52248 ix86_bnd_prefixed_insn_p (rtx insn)
52250 /* For call insns check special flag. */
52251 if (insn && CALL_P (insn))
52253 rtx call = get_call_rtx_from (insn);
52255 return CALL_EXPR_WITH_BOUNDS_P (call);
52258 /* All other insns are prefixed only if function is instrumented. */
52259 return chkp_function_instrumented_p (current_function_decl);
52262 /* Calculate integer abs() using only SSE2 instructions. */
52265 ix86_expand_sse2_abs (rtx target, rtx input)
52267 machine_mode mode = GET_MODE (target);
52272 /* For 32-bit signed integer X, the best way to calculate the absolute
52273 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
52275 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
52276 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
52277 NULL, 0, OPTAB_DIRECT);
52278 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
52279 NULL, 0, OPTAB_DIRECT);
52280 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
52281 target, 0, OPTAB_DIRECT);
52284 /* For 16-bit signed integer X, the best way to calculate the absolute
52285 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
52287 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52289 x = expand_simple_binop (mode, SMAX, tmp0, input,
52290 target, 0, OPTAB_DIRECT);
52293 /* For 8-bit signed integer X, the best way to calculate the absolute
52294 value of X is min ((unsigned char) X, (unsigned char) (-X)),
52295 as SSE2 provides the PMINUB insn. */
52297 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
52299 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
52300 target, 0, OPTAB_DIRECT);
52304 gcc_unreachable ();
52308 emit_move_insn (target, x);
52311 /* Expand an extract from a vector register through pextr insn.
52312 Return true if successful. */
52315 ix86_expand_pextr (rtx *operands)
52317 rtx dst = operands[0];
52318 rtx src = operands[1];
52320 unsigned int size = INTVAL (operands[2]);
52321 unsigned int pos = INTVAL (operands[3]);
52323 if (SUBREG_P (dst))
52325 /* Reject non-lowpart subregs. */
52326 if (SUBREG_BYTE (dst) > 0)
52328 dst = SUBREG_REG (dst);
52331 if (SUBREG_P (src))
52333 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
52334 src = SUBREG_REG (src);
52337 switch (GET_MODE (src))
52346 machine_mode srcmode, dstmode;
52349 dstmode = mode_for_size (size, MODE_INT, 0);
52354 if (!TARGET_SSE4_1)
52356 srcmode = V16QImode;
52362 srcmode = V8HImode;
52366 if (!TARGET_SSE4_1)
52368 srcmode = V4SImode;
52372 gcc_assert (TARGET_64BIT);
52373 if (!TARGET_SSE4_1)
52375 srcmode = V2DImode;
52382 /* Reject extractions from misaligned positions. */
52383 if (pos & (size-1))
52386 if (GET_MODE (dst) == dstmode)
52389 d = gen_reg_rtx (dstmode);
52391 /* Construct insn pattern. */
52392 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
52393 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
52395 /* Let the rtl optimizers know about the zero extension performed. */
52396 if (dstmode == QImode || dstmode == HImode)
52398 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
52399 d = gen_lowpart (SImode, d);
52402 emit_insn (gen_rtx_SET (d, pat));
52405 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52414 /* Expand an insert into a vector register through pinsr insn.
52415 Return true if successful. */
52418 ix86_expand_pinsr (rtx *operands)
52420 rtx dst = operands[0];
52421 rtx src = operands[3];
52423 unsigned int size = INTVAL (operands[1]);
52424 unsigned int pos = INTVAL (operands[2]);
52426 if (SUBREG_P (dst))
52428 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
52429 dst = SUBREG_REG (dst);
52432 switch (GET_MODE (dst))
52441 machine_mode srcmode, dstmode;
52442 rtx (*pinsr)(rtx, rtx, rtx, rtx);
52445 srcmode = mode_for_size (size, MODE_INT, 0);
52450 if (!TARGET_SSE4_1)
52452 dstmode = V16QImode;
52453 pinsr = gen_sse4_1_pinsrb;
52459 dstmode = V8HImode;
52460 pinsr = gen_sse2_pinsrw;
52464 if (!TARGET_SSE4_1)
52466 dstmode = V4SImode;
52467 pinsr = gen_sse4_1_pinsrd;
52471 gcc_assert (TARGET_64BIT);
52472 if (!TARGET_SSE4_1)
52474 dstmode = V2DImode;
52475 pinsr = gen_sse4_1_pinsrq;
52482 /* Reject insertions to misaligned positions. */
52483 if (pos & (size-1))
52486 if (SUBREG_P (src))
52488 unsigned int srcpos = SUBREG_BYTE (src);
52494 extr_ops[0] = gen_reg_rtx (srcmode);
52495 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
52496 extr_ops[2] = GEN_INT (size);
52497 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
52499 if (!ix86_expand_pextr (extr_ops))
52505 src = gen_lowpart (srcmode, SUBREG_REG (src));
52508 if (GET_MODE (dst) == dstmode)
52511 d = gen_reg_rtx (dstmode);
52513 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
52514 gen_lowpart (srcmode, src),
52515 GEN_INT (1 << (pos / size))));
52517 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
52526 /* This function returns the calling abi specific va_list type node.
52527 It returns the FNDECL specific va_list type. */
52530 ix86_fn_abi_va_list (tree fndecl)
52533 return va_list_type_node;
52534 gcc_assert (fndecl != NULL_TREE);
52536 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
52537 return ms_va_list_type_node;
52539 return sysv_va_list_type_node;
52542 /* Returns the canonical va_list type specified by TYPE. If there
52543 is no valid TYPE provided, it return NULL_TREE. */
52546 ix86_canonical_va_list_type (tree type)
52550 /* Resolve references and pointers to va_list type. */
52551 if (TREE_CODE (type) == MEM_REF)
52552 type = TREE_TYPE (type);
52553 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
52554 type = TREE_TYPE (type);
52555 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
52556 type = TREE_TYPE (type);
52558 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
52560 wtype = va_list_type_node;
52561 gcc_assert (wtype != NULL_TREE);
52563 if (TREE_CODE (wtype) == ARRAY_TYPE)
52565 /* If va_list is an array type, the argument may have decayed
52566 to a pointer type, e.g. by being passed to another function.
52567 In that case, unwrap both types so that we can compare the
52568 underlying records. */
52569 if (TREE_CODE (htype) == ARRAY_TYPE
52570 || POINTER_TYPE_P (htype))
52572 wtype = TREE_TYPE (wtype);
52573 htype = TREE_TYPE (htype);
52576 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52577 return va_list_type_node;
52578 wtype = sysv_va_list_type_node;
52579 gcc_assert (wtype != NULL_TREE);
52581 if (TREE_CODE (wtype) == ARRAY_TYPE)
52583 /* If va_list is an array type, the argument may have decayed
52584 to a pointer type, e.g. by being passed to another function.
52585 In that case, unwrap both types so that we can compare the
52586 underlying records. */
52587 if (TREE_CODE (htype) == ARRAY_TYPE
52588 || POINTER_TYPE_P (htype))
52590 wtype = TREE_TYPE (wtype);
52591 htype = TREE_TYPE (htype);
52594 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52595 return sysv_va_list_type_node;
52596 wtype = ms_va_list_type_node;
52597 gcc_assert (wtype != NULL_TREE);
52599 if (TREE_CODE (wtype) == ARRAY_TYPE)
52601 /* If va_list is an array type, the argument may have decayed
52602 to a pointer type, e.g. by being passed to another function.
52603 In that case, unwrap both types so that we can compare the
52604 underlying records. */
52605 if (TREE_CODE (htype) == ARRAY_TYPE
52606 || POINTER_TYPE_P (htype))
52608 wtype = TREE_TYPE (wtype);
52609 htype = TREE_TYPE (htype);
52612 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
52613 return ms_va_list_type_node;
52616 return std_canonical_va_list_type (type);
52619 /* Iterate through the target-specific builtin types for va_list.
52620 IDX denotes the iterator, *PTREE is set to the result type of
52621 the va_list builtin, and *PNAME to its internal type.
52622 Returns zero if there is no element for this index, otherwise
52623 IDX should be increased upon the next call.
52624 Note, do not iterate a base builtin's name like __builtin_va_list.
52625 Used from c_common_nodes_and_builtins. */
52628 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
52638 *ptree = ms_va_list_type_node;
52639 *pname = "__builtin_ms_va_list";
52643 *ptree = sysv_va_list_type_node;
52644 *pname = "__builtin_sysv_va_list";
52652 #undef TARGET_SCHED_DISPATCH
52653 #define TARGET_SCHED_DISPATCH has_dispatch
52654 #undef TARGET_SCHED_DISPATCH_DO
52655 #define TARGET_SCHED_DISPATCH_DO do_dispatch
52656 #undef TARGET_SCHED_REASSOCIATION_WIDTH
52657 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
52658 #undef TARGET_SCHED_REORDER
52659 #define TARGET_SCHED_REORDER ix86_sched_reorder
52660 #undef TARGET_SCHED_ADJUST_PRIORITY
52661 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
52662 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
52663 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
52664 ix86_dependencies_evaluation_hook
52666 /* The size of the dispatch window is the total number of bytes of
52667 object code allowed in a window. */
52668 #define DISPATCH_WINDOW_SIZE 16
52670 /* Number of dispatch windows considered for scheduling. */
52671 #define MAX_DISPATCH_WINDOWS 3
52673 /* Maximum number of instructions in a window. */
52676 /* Maximum number of immediate operands in a window. */
52679 /* Maximum number of immediate bits allowed in a window. */
52680 #define MAX_IMM_SIZE 128
52682 /* Maximum number of 32 bit immediates allowed in a window. */
52683 #define MAX_IMM_32 4
52685 /* Maximum number of 64 bit immediates allowed in a window. */
52686 #define MAX_IMM_64 2
52688 /* Maximum total of loads or prefetches allowed in a window. */
52691 /* Maximum total of stores allowed in a window. */
52692 #define MAX_STORE 1
52698 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
52699 enum dispatch_group {
52714 /* Number of allowable groups in a dispatch window. It is an array
52715 indexed by dispatch_group enum. 100 is used as a big number,
52716 because the number of these kind of operations does not have any
52717 effect in dispatch window, but we need them for other reasons in
52719 static unsigned int num_allowable_groups[disp_last] = {
52720 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
52723 char group_name[disp_last + 1][16] = {
52724 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
52725 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
52726 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
52729 /* Instruction path. */
52732 path_single, /* Single micro op. */
52733 path_double, /* Double micro op. */
52734 path_multi, /* Instructions with more than 2 micro op.. */
52738 /* sched_insn_info defines a window to the instructions scheduled in
52739 the basic block. It contains a pointer to the insn_info table and
52740 the instruction scheduled.
52742 Windows are allocated for each basic block and are linked
52744 typedef struct sched_insn_info_s {
52746 enum dispatch_group group;
52747 enum insn_path path;
52752 /* Linked list of dispatch windows. This is a two way list of
52753 dispatch windows of a basic block. It contains information about
52754 the number of uops in the window and the total number of
52755 instructions and of bytes in the object code for this dispatch
52757 typedef struct dispatch_windows_s {
52758 int num_insn; /* Number of insn in the window. */
52759 int num_uops; /* Number of uops in the window. */
52760 int window_size; /* Number of bytes in the window. */
52761 int window_num; /* Window number between 0 or 1. */
52762 int num_imm; /* Number of immediates in an insn. */
52763 int num_imm_32; /* Number of 32 bit immediates in an insn. */
52764 int num_imm_64; /* Number of 64 bit immediates in an insn. */
52765 int imm_size; /* Total immediates in the window. */
52766 int num_loads; /* Total memory loads in the window. */
52767 int num_stores; /* Total memory stores in the window. */
52768 int violation; /* Violation exists in window. */
52769 sched_insn_info *window; /* Pointer to the window. */
52770 struct dispatch_windows_s *next;
52771 struct dispatch_windows_s *prev;
52772 } dispatch_windows;
52774 /* Immediate valuse used in an insn. */
52775 typedef struct imm_info_s
52782 static dispatch_windows *dispatch_window_list;
52783 static dispatch_windows *dispatch_window_list1;
52785 /* Get dispatch group of insn. */
52787 static enum dispatch_group
52788 get_mem_group (rtx_insn *insn)
52790 enum attr_memory memory;
52792 if (INSN_CODE (insn) < 0)
52793 return disp_no_group;
52794 memory = get_attr_memory (insn);
52795 if (memory == MEMORY_STORE)
52798 if (memory == MEMORY_LOAD)
52801 if (memory == MEMORY_BOTH)
52802 return disp_load_store;
52804 return disp_no_group;
52807 /* Return true if insn is a compare instruction. */
52810 is_cmp (rtx_insn *insn)
52812 enum attr_type type;
52814 type = get_attr_type (insn);
52815 return (type == TYPE_TEST
52816 || type == TYPE_ICMP
52817 || type == TYPE_FCMP
52818 || GET_CODE (PATTERN (insn)) == COMPARE);
52821 /* Return true if a dispatch violation encountered. */
52824 dispatch_violation (void)
52826 if (dispatch_window_list->next)
52827 return dispatch_window_list->next->violation;
52828 return dispatch_window_list->violation;
52831 /* Return true if insn is a branch instruction. */
52834 is_branch (rtx_insn *insn)
52836 return (CALL_P (insn) || JUMP_P (insn));
52839 /* Return true if insn is a prefetch instruction. */
52842 is_prefetch (rtx_insn *insn)
52844 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
52847 /* This function initializes a dispatch window and the list container holding a
52848 pointer to the window. */
52851 init_window (int window_num)
52854 dispatch_windows *new_list;
52856 if (window_num == 0)
52857 new_list = dispatch_window_list;
52859 new_list = dispatch_window_list1;
52861 new_list->num_insn = 0;
52862 new_list->num_uops = 0;
52863 new_list->window_size = 0;
52864 new_list->next = NULL;
52865 new_list->prev = NULL;
52866 new_list->window_num = window_num;
52867 new_list->num_imm = 0;
52868 new_list->num_imm_32 = 0;
52869 new_list->num_imm_64 = 0;
52870 new_list->imm_size = 0;
52871 new_list->num_loads = 0;
52872 new_list->num_stores = 0;
52873 new_list->violation = false;
52875 for (i = 0; i < MAX_INSN; i++)
52877 new_list->window[i].insn = NULL;
52878 new_list->window[i].group = disp_no_group;
52879 new_list->window[i].path = no_path;
52880 new_list->window[i].byte_len = 0;
52881 new_list->window[i].imm_bytes = 0;
52886 /* This function allocates and initializes a dispatch window and the
52887 list container holding a pointer to the window. */
52889 static dispatch_windows *
52890 allocate_window (void)
52892 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
52893 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
52898 /* This routine initializes the dispatch scheduling information. It
52899 initiates building dispatch scheduler tables and constructs the
52900 first dispatch window. */
52903 init_dispatch_sched (void)
52905 /* Allocate a dispatch list and a window. */
52906 dispatch_window_list = allocate_window ();
52907 dispatch_window_list1 = allocate_window ();
52912 /* This function returns true if a branch is detected. End of a basic block
52913 does not have to be a branch, but here we assume only branches end a
52917 is_end_basic_block (enum dispatch_group group)
52919 return group == disp_branch;
52922 /* This function is called when the end of a window processing is reached. */
52925 process_end_window (void)
52927 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
52928 if (dispatch_window_list->next)
52930 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
52931 gcc_assert (dispatch_window_list->window_size
52932 + dispatch_window_list1->window_size <= 48);
52938 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
52939 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
52940 for 48 bytes of instructions. Note that these windows are not dispatch
52941 windows that their sizes are DISPATCH_WINDOW_SIZE. */
52943 static dispatch_windows *
52944 allocate_next_window (int window_num)
52946 if (window_num == 0)
52948 if (dispatch_window_list->next)
52951 return dispatch_window_list;
52954 dispatch_window_list->next = dispatch_window_list1;
52955 dispatch_window_list1->prev = dispatch_window_list;
52957 return dispatch_window_list1;
52960 /* Compute number of immediate operands of an instruction. */
52963 find_constant (rtx in_rtx, imm_info *imm_values)
52965 if (INSN_P (in_rtx))
52966 in_rtx = PATTERN (in_rtx);
52967 subrtx_iterator::array_type array;
52968 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
52969 if (const_rtx x = *iter)
52970 switch (GET_CODE (x))
52975 (imm_values->imm)++;
52976 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
52977 (imm_values->imm32)++;
52979 (imm_values->imm64)++;
52983 case CONST_WIDE_INT:
52984 (imm_values->imm)++;
52985 (imm_values->imm64)++;
52989 if (LABEL_KIND (x) == LABEL_NORMAL)
52991 (imm_values->imm)++;
52992 (imm_values->imm32)++;
53001 /* Return total size of immediate operands of an instruction along with number
53002 of corresponding immediate-operands. It initializes its parameters to zero
53003 befor calling FIND_CONSTANT.
53004 INSN is the input instruction. IMM is the total of immediates.
53005 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
53009 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
53011 imm_info imm_values = {0, 0, 0};
53013 find_constant (insn, &imm_values);
53014 *imm = imm_values.imm;
53015 *imm32 = imm_values.imm32;
53016 *imm64 = imm_values.imm64;
53017 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
53020 /* This function indicates if an operand of an instruction is an
53024 has_immediate (rtx_insn *insn)
53026 int num_imm_operand;
53027 int num_imm32_operand;
53028 int num_imm64_operand;
53031 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53032 &num_imm64_operand);
53036 /* Return single or double path for instructions. */
53038 static enum insn_path
53039 get_insn_path (rtx_insn *insn)
53041 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
53043 if ((int)path == 0)
53044 return path_single;
53046 if ((int)path == 1)
53047 return path_double;
53052 /* Return insn dispatch group. */
53054 static enum dispatch_group
53055 get_insn_group (rtx_insn *insn)
53057 enum dispatch_group group = get_mem_group (insn);
53061 if (is_branch (insn))
53062 return disp_branch;
53067 if (has_immediate (insn))
53070 if (is_prefetch (insn))
53071 return disp_prefetch;
53073 return disp_no_group;
53076 /* Count number of GROUP restricted instructions in a dispatch
53077 window WINDOW_LIST. */
53080 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
53082 enum dispatch_group group = get_insn_group (insn);
53084 int num_imm_operand;
53085 int num_imm32_operand;
53086 int num_imm64_operand;
53088 if (group == disp_no_group)
53091 if (group == disp_imm)
53093 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53094 &num_imm64_operand);
53095 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
53096 || num_imm_operand + window_list->num_imm > MAX_IMM
53097 || (num_imm32_operand > 0
53098 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
53099 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
53100 || (num_imm64_operand > 0
53101 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
53102 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
53103 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
53104 && num_imm64_operand > 0
53105 && ((window_list->num_imm_64 > 0
53106 && window_list->num_insn >= 2)
53107 || window_list->num_insn >= 3)))
53113 if ((group == disp_load_store
53114 && (window_list->num_loads >= MAX_LOAD
53115 || window_list->num_stores >= MAX_STORE))
53116 || ((group == disp_load
53117 || group == disp_prefetch)
53118 && window_list->num_loads >= MAX_LOAD)
53119 || (group == disp_store
53120 && window_list->num_stores >= MAX_STORE))
53126 /* This function returns true if insn satisfies dispatch rules on the
53127 last window scheduled. */
53130 fits_dispatch_window (rtx_insn *insn)
53132 dispatch_windows *window_list = dispatch_window_list;
53133 dispatch_windows *window_list_next = dispatch_window_list->next;
53134 unsigned int num_restrict;
53135 enum dispatch_group group = get_insn_group (insn);
53136 enum insn_path path = get_insn_path (insn);
53139 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
53140 instructions should be given the lowest priority in the
53141 scheduling process in Haifa scheduler to make sure they will be
53142 scheduled in the same dispatch window as the reference to them. */
53143 if (group == disp_jcc || group == disp_cmp)
53146 /* Check nonrestricted. */
53147 if (group == disp_no_group || group == disp_branch)
53150 /* Get last dispatch window. */
53151 if (window_list_next)
53152 window_list = window_list_next;
53154 if (window_list->window_num == 1)
53156 sum = window_list->prev->window_size + window_list->window_size;
53159 || (min_insn_size (insn) + sum) >= 48)
53160 /* Window 1 is full. Go for next window. */
53164 num_restrict = count_num_restricted (insn, window_list);
53166 if (num_restrict > num_allowable_groups[group])
53169 /* See if it fits in the first window. */
53170 if (window_list->window_num == 0)
53172 /* The first widow should have only single and double path
53174 if (path == path_double
53175 && (window_list->num_uops + 2) > MAX_INSN)
53177 else if (path != path_single)
53183 /* Add an instruction INSN with NUM_UOPS micro-operations to the
53184 dispatch window WINDOW_LIST. */
53187 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
53189 int byte_len = min_insn_size (insn);
53190 int num_insn = window_list->num_insn;
53192 sched_insn_info *window = window_list->window;
53193 enum dispatch_group group = get_insn_group (insn);
53194 enum insn_path path = get_insn_path (insn);
53195 int num_imm_operand;
53196 int num_imm32_operand;
53197 int num_imm64_operand;
53199 if (!window_list->violation && group != disp_cmp
53200 && !fits_dispatch_window (insn))
53201 window_list->violation = true;
53203 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53204 &num_imm64_operand);
53206 /* Initialize window with new instruction. */
53207 window[num_insn].insn = insn;
53208 window[num_insn].byte_len = byte_len;
53209 window[num_insn].group = group;
53210 window[num_insn].path = path;
53211 window[num_insn].imm_bytes = imm_size;
53213 window_list->window_size += byte_len;
53214 window_list->num_insn = num_insn + 1;
53215 window_list->num_uops = window_list->num_uops + num_uops;
53216 window_list->imm_size += imm_size;
53217 window_list->num_imm += num_imm_operand;
53218 window_list->num_imm_32 += num_imm32_operand;
53219 window_list->num_imm_64 += num_imm64_operand;
53221 if (group == disp_store)
53222 window_list->num_stores += 1;
53223 else if (group == disp_load
53224 || group == disp_prefetch)
53225 window_list->num_loads += 1;
53226 else if (group == disp_load_store)
53228 window_list->num_stores += 1;
53229 window_list->num_loads += 1;
53233 /* Adds a scheduled instruction, INSN, to the current dispatch window.
53234 If the total bytes of instructions or the number of instructions in
53235 the window exceed allowable, it allocates a new window. */
53238 add_to_dispatch_window (rtx_insn *insn)
53241 dispatch_windows *window_list;
53242 dispatch_windows *next_list;
53243 dispatch_windows *window0_list;
53244 enum insn_path path;
53245 enum dispatch_group insn_group;
53253 if (INSN_CODE (insn) < 0)
53256 byte_len = min_insn_size (insn);
53257 window_list = dispatch_window_list;
53258 next_list = window_list->next;
53259 path = get_insn_path (insn);
53260 insn_group = get_insn_group (insn);
53262 /* Get the last dispatch window. */
53264 window_list = dispatch_window_list->next;
53266 if (path == path_single)
53268 else if (path == path_double)
53271 insn_num_uops = (int) path;
53273 /* If current window is full, get a new window.
53274 Window number zero is full, if MAX_INSN uops are scheduled in it.
53275 Window number one is full, if window zero's bytes plus window
53276 one's bytes is 32, or if the bytes of the new instruction added
53277 to the total makes it greater than 48, or it has already MAX_INSN
53278 instructions in it. */
53279 num_insn = window_list->num_insn;
53280 num_uops = window_list->num_uops;
53281 window_num = window_list->window_num;
53282 insn_fits = fits_dispatch_window (insn);
53284 if (num_insn >= MAX_INSN
53285 || num_uops + insn_num_uops > MAX_INSN
53288 window_num = ~window_num & 1;
53289 window_list = allocate_next_window (window_num);
53292 if (window_num == 0)
53294 add_insn_window (insn, window_list, insn_num_uops);
53295 if (window_list->num_insn >= MAX_INSN
53296 && insn_group == disp_branch)
53298 process_end_window ();
53302 else if (window_num == 1)
53304 window0_list = window_list->prev;
53305 sum = window0_list->window_size + window_list->window_size;
53307 || (byte_len + sum) >= 48)
53309 process_end_window ();
53310 window_list = dispatch_window_list;
53313 add_insn_window (insn, window_list, insn_num_uops);
53316 gcc_unreachable ();
53318 if (is_end_basic_block (insn_group))
53320 /* End of basic block is reached do end-basic-block process. */
53321 process_end_window ();
53326 /* Print the dispatch window, WINDOW_NUM, to FILE. */
53328 DEBUG_FUNCTION static void
53329 debug_dispatch_window_file (FILE *file, int window_num)
53331 dispatch_windows *list;
53334 if (window_num == 0)
53335 list = dispatch_window_list;
53337 list = dispatch_window_list1;
53339 fprintf (file, "Window #%d:\n", list->window_num);
53340 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
53341 list->num_insn, list->num_uops, list->window_size);
53342 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53343 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
53345 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
53347 fprintf (file, " insn info:\n");
53349 for (i = 0; i < MAX_INSN; i++)
53351 if (!list->window[i].insn)
53353 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
53354 i, group_name[list->window[i].group],
53355 i, (void *)list->window[i].insn,
53356 i, list->window[i].path,
53357 i, list->window[i].byte_len,
53358 i, list->window[i].imm_bytes);
53362 /* Print to stdout a dispatch window. */
53364 DEBUG_FUNCTION void
53365 debug_dispatch_window (int window_num)
53367 debug_dispatch_window_file (stdout, window_num);
53370 /* Print INSN dispatch information to FILE. */
53372 DEBUG_FUNCTION static void
53373 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
53376 enum insn_path path;
53377 enum dispatch_group group;
53379 int num_imm_operand;
53380 int num_imm32_operand;
53381 int num_imm64_operand;
53383 if (INSN_CODE (insn) < 0)
53386 byte_len = min_insn_size (insn);
53387 path = get_insn_path (insn);
53388 group = get_insn_group (insn);
53389 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
53390 &num_imm64_operand);
53392 fprintf (file, " insn info:\n");
53393 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
53394 group_name[group], path, byte_len);
53395 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
53396 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
53399 /* Print to STDERR the status of the ready list with respect to
53400 dispatch windows. */
53402 DEBUG_FUNCTION void
53403 debug_ready_dispatch (void)
53406 int no_ready = number_in_ready ();
53408 fprintf (stdout, "Number of ready: %d\n", no_ready);
53410 for (i = 0; i < no_ready; i++)
53411 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
53414 /* This routine is the driver of the dispatch scheduler. */
53417 do_dispatch (rtx_insn *insn, int mode)
53419 if (mode == DISPATCH_INIT)
53420 init_dispatch_sched ();
53421 else if (mode == ADD_TO_DISPATCH_WINDOW)
53422 add_to_dispatch_window (insn);
53425 /* Return TRUE if Dispatch Scheduling is supported. */
53428 has_dispatch (rtx_insn *insn, int action)
53430 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
53431 || TARGET_BDVER4 || TARGET_ZNVER1) && flag_dispatch_scheduler)
53437 case IS_DISPATCH_ON:
53442 return is_cmp (insn);
53444 case DISPATCH_VIOLATION:
53445 return dispatch_violation ();
53447 case FITS_DISPATCH_WINDOW:
53448 return fits_dispatch_window (insn);
53454 /* Implementation of reassociation_width target hook used by
53455 reassoc phase to identify parallelism level in reassociated
53456 tree. Statements tree_code is passed in OPC. Arguments type
53459 Currently parallel reassociation is enabled for Atom
53460 processors only and we set reassociation width to be 2
53461 because Atom may issue up to 2 instructions per cycle.
53463 Return value should be fixed if parallel reassociation is
53464 enabled for other processors. */
53467 ix86_reassociation_width (unsigned int, machine_mode mode)
53470 if (VECTOR_MODE_P (mode))
53472 if (TARGET_VECTOR_PARALLEL_EXECUTION)
53479 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
53481 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
53482 return ((TARGET_64BIT && ix86_tune == PROCESSOR_HASWELL)? 4 : 2);
53487 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
53488 place emms and femms instructions. */
53490 static machine_mode
53491 ix86_preferred_simd_mode (machine_mode mode)
53499 return TARGET_AVX512BW ? V64QImode :
53500 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
53502 return TARGET_AVX512BW ? V32HImode :
53503 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
53505 return TARGET_AVX512F ? V16SImode :
53506 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
53508 return TARGET_AVX512F ? V8DImode :
53509 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
53512 if (TARGET_AVX512F)
53514 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53520 if (!TARGET_VECTORIZE_DOUBLE)
53522 else if (TARGET_AVX512F)
53524 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
53526 else if (TARGET_SSE2)
53535 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
53536 vectors. If AVX512F is enabled then try vectorizing with 512bit,
53537 256bit and 128bit vectors. */
53539 static unsigned int
53540 ix86_autovectorize_vector_sizes (void)
53542 return TARGET_AVX512F ? 64 | 32 | 16 :
53543 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
53546 /* Implemenation of targetm.vectorize.get_mask_mode. */
53548 static machine_mode
53549 ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
53551 unsigned elem_size = vector_size / nunits;
53553 /* Scalar mask case. */
53554 if ((TARGET_AVX512F && vector_size == 64)
53555 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
53557 if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
53558 return smallest_mode_for_size (nunits, MODE_INT);
53561 machine_mode elem_mode
53562 = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
53564 gcc_assert (elem_size * nunits == vector_size);
53566 return mode_for_vector (elem_mode, nunits);
53571 /* Return class of registers which could be used for pseudo of MODE
53572 and of class RCLASS for spilling instead of memory. Return NO_REGS
53573 if it is not possible or non-profitable. */
53575 ix86_spill_class (reg_class_t rclass, machine_mode mode)
53577 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
53578 && (mode == SImode || (TARGET_64BIT && mode == DImode))
53579 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
53580 return ALL_SSE_REGS;
53584 /* Implement targetm.vectorize.init_cost. */
53587 ix86_init_cost (struct loop *)
53589 unsigned *cost = XNEWVEC (unsigned, 3);
53590 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
53594 /* Implement targetm.vectorize.add_stmt_cost. */
53597 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
53598 struct _stmt_vec_info *stmt_info, int misalign,
53599 enum vect_cost_model_location where)
53601 unsigned *cost = (unsigned *) data;
53602 unsigned retval = 0;
53604 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
53605 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
53607 /* Statements in an inner loop relative to the loop being
53608 vectorized are weighted more heavily. The value here is
53609 arbitrary and could potentially be improved with analysis. */
53610 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
53611 count *= 50; /* FIXME. */
53613 retval = (unsigned) (count * stmt_cost);
53615 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
53616 for Silvermont as it has out of order integer pipeline and can execute
53617 2 scalar instruction per tick, but has in order SIMD pipeline. */
53618 if (TARGET_SILVERMONT || TARGET_INTEL)
53619 if (stmt_info && stmt_info->stmt)
53621 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
53622 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
53623 retval = (retval * 17) / 10;
53626 cost[where] += retval;
53631 /* Implement targetm.vectorize.finish_cost. */
53634 ix86_finish_cost (void *data, unsigned *prologue_cost,
53635 unsigned *body_cost, unsigned *epilogue_cost)
53637 unsigned *cost = (unsigned *) data;
53638 *prologue_cost = cost[vect_prologue];
53639 *body_cost = cost[vect_body];
53640 *epilogue_cost = cost[vect_epilogue];
53643 /* Implement targetm.vectorize.destroy_cost_data. */
53646 ix86_destroy_cost_data (void *data)
53651 /* Validate target specific memory model bits in VAL. */
53653 static unsigned HOST_WIDE_INT
53654 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
53656 enum memmodel model = memmodel_from_int (val);
53659 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
53661 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
53663 warning (OPT_Winvalid_memory_model,
53664 "Unknown architecture specific memory model");
53665 return MEMMODEL_SEQ_CST;
53667 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
53668 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
53670 warning (OPT_Winvalid_memory_model,
53671 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
53672 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
53674 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
53676 warning (OPT_Winvalid_memory_model,
53677 "HLE_RELEASE not used with RELEASE or stronger memory model");
53678 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
53683 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
53684 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
53685 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
53686 or number of vecsize_mangle variants that should be emitted. */
53689 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
53690 struct cgraph_simd_clone *clonei,
53691 tree base_type, int num)
53695 if (clonei->simdlen
53696 && (clonei->simdlen < 2
53697 || clonei->simdlen > 16
53698 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
53700 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53701 "unsupported simdlen %d", clonei->simdlen);
53705 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
53706 if (TREE_CODE (ret_type) != VOID_TYPE)
53707 switch (TYPE_MODE (ret_type))
53719 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53720 "unsupported return type %qT for simd\n", ret_type);
53727 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
53728 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
53729 switch (TYPE_MODE (TREE_TYPE (t)))
53741 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
53742 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
53746 if (clonei->cilk_elemental)
53748 /* Parse here processor clause. If not present, default to 'b'. */
53749 clonei->vecsize_mangle = 'b';
53751 else if (!TREE_PUBLIC (node->decl))
53753 /* If the function isn't exported, we can pick up just one ISA
53756 clonei->vecsize_mangle = 'd';
53757 else if (TARGET_AVX)
53758 clonei->vecsize_mangle = 'c';
53760 clonei->vecsize_mangle = 'b';
53765 clonei->vecsize_mangle = "bcd"[num];
53768 switch (clonei->vecsize_mangle)
53771 clonei->vecsize_int = 128;
53772 clonei->vecsize_float = 128;
53775 clonei->vecsize_int = 128;
53776 clonei->vecsize_float = 256;
53779 clonei->vecsize_int = 256;
53780 clonei->vecsize_float = 256;
53783 if (clonei->simdlen == 0)
53785 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
53786 clonei->simdlen = clonei->vecsize_int;
53788 clonei->simdlen = clonei->vecsize_float;
53789 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
53790 if (clonei->simdlen > 16)
53791 clonei->simdlen = 16;
53796 /* Add target attribute to SIMD clone NODE if needed. */
53799 ix86_simd_clone_adjust (struct cgraph_node *node)
53801 const char *str = NULL;
53802 gcc_assert (node->decl == cfun->decl);
53803 switch (node->simdclone->vecsize_mangle)
53818 gcc_unreachable ();
53823 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
53824 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
53827 ix86_reset_previous_fndecl ();
53828 ix86_set_current_function (node->decl);
53831 /* If SIMD clone NODE can't be used in a vectorized loop
53832 in current function, return -1, otherwise return a badness of using it
53833 (0 if it is most desirable from vecsize_mangle point of view, 1
53834 slightly less desirable, etc.). */
53837 ix86_simd_clone_usable (struct cgraph_node *node)
53839 switch (node->simdclone->vecsize_mangle)
53846 return TARGET_AVX2 ? 2 : 1;
53850 return TARGET_AVX2 ? 1 : 0;
53857 gcc_unreachable ();
53861 /* This function adjusts the unroll factor based on
53862 the hardware capabilities. For ex, bdver3 has
53863 a loop buffer which makes unrolling of smaller
53864 loops less important. This function decides the
53865 unroll factor using number of memory references
53866 (value 32 is used) as a heuristic. */
53869 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
53874 unsigned mem_count = 0;
53876 if (!TARGET_ADJUST_UNROLL)
53879 /* Count the number of memory references within the loop body.
53880 This value determines the unrolling factor for bdver3 and bdver4
53882 subrtx_iterator::array_type array;
53883 bbs = get_loop_body (loop);
53884 for (i = 0; i < loop->num_nodes; i++)
53885 FOR_BB_INSNS (bbs[i], insn)
53886 if (NONDEBUG_INSN_P (insn))
53887 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
53888 if (const_rtx x = *iter)
53891 machine_mode mode = GET_MODE (x);
53892 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
53900 if (mem_count && mem_count <=32)
53901 return 32/mem_count;
53907 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
53910 ix86_float_exceptions_rounding_supported_p (void)
53912 /* For x87 floating point with standard excess precision handling,
53913 there is no adddf3 pattern (since x87 floating point only has
53914 XFmode operations) so the default hook implementation gets this
53916 return TARGET_80387 || TARGET_SSE_MATH;
53919 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
53922 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
53924 if (!TARGET_80387 && !TARGET_SSE_MATH)
53926 tree exceptions_var = create_tmp_var_raw (integer_type_node);
53929 tree fenv_index_type = build_index_type (size_int (6));
53930 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
53931 tree fenv_var = create_tmp_var_raw (fenv_type);
53932 TREE_ADDRESSABLE (fenv_var) = 1;
53933 tree fenv_ptr = build_pointer_type (fenv_type);
53934 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
53935 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
53936 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
53937 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
53938 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
53939 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
53940 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
53941 tree hold_fnclex = build_call_expr (fnclex, 0);
53942 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
53943 NULL_TREE, NULL_TREE);
53944 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
53946 *clear = build_call_expr (fnclex, 0);
53947 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
53948 tree fnstsw_call = build_call_expr (fnstsw, 0);
53949 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
53950 sw_var, fnstsw_call);
53951 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
53952 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
53953 exceptions_var, exceptions_x87);
53954 *update = build2 (COMPOUND_EXPR, integer_type_node,
53955 sw_mod, update_mod);
53956 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
53957 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
53959 if (TARGET_SSE_MATH)
53961 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
53962 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
53963 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
53964 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
53965 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
53966 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
53967 mxcsr_orig_var, stmxcsr_hold_call);
53968 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
53970 build_int_cst (unsigned_type_node, 0x1f80));
53971 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
53972 build_int_cst (unsigned_type_node, 0xffffffc0));
53973 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
53974 mxcsr_mod_var, hold_mod_val);
53975 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53976 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
53977 hold_assign_orig, hold_assign_mod);
53978 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
53979 ldmxcsr_hold_call);
53981 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
53984 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
53986 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
53987 ldmxcsr_clear_call);
53989 *clear = ldmxcsr_clear_call;
53990 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
53991 tree exceptions_sse = fold_convert (integer_type_node,
53992 stxmcsr_update_call);
53995 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
53996 exceptions_var, exceptions_sse);
53997 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
53998 exceptions_var, exceptions_mod);
53999 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
54000 exceptions_assign);
54003 *update = build2 (MODIFY_EXPR, integer_type_node,
54004 exceptions_var, exceptions_sse);
54005 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
54006 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54007 ldmxcsr_update_call);
54009 tree atomic_feraiseexcept
54010 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
54011 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
54012 1, exceptions_var);
54013 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
54014 atomic_feraiseexcept_call);
54017 /* Return mode to be used for bounds or VOIDmode
54018 if bounds are not supported. */
54020 static enum machine_mode
54021 ix86_mpx_bound_mode ()
54023 /* Do not support pointer checker if MPX
54027 if (flag_check_pointer_bounds)
54028 warning (0, "Pointer Checker requires MPX support on this target."
54029 " Use -mmpx options to enable MPX.");
54036 /* Return constant used to statically initialize constant bounds.
54038 This function is used to create special bound values. For now
54039 only INIT bounds and NONE bounds are expected. More special
54040 values may be added later. */
54043 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
54045 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
54046 : build_zero_cst (pointer_sized_int_node);
54047 tree high = ub ? build_zero_cst (pointer_sized_int_node)
54048 : build_minus_one_cst (pointer_sized_int_node);
54050 /* This function is supposed to be used to create INIT and
54051 NONE bounds only. */
54052 gcc_assert ((lb == 0 && ub == -1)
54053 || (lb == -1 && ub == 0));
54055 return build_complex (NULL, low, high);
54058 /* Generate a list of statements STMTS to initialize pointer bounds
54059 variable VAR with bounds LB and UB. Return the number of generated
54063 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
54065 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
54066 tree lhs, modify, var_p;
54068 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
54069 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
54071 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
54072 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
54073 append_to_statement_list (modify, stmts);
54075 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
54076 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
54077 TYPE_SIZE_UNIT (pointer_sized_int_node)));
54078 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
54079 append_to_statement_list (modify, stmts);
54084 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
54085 /* For i386, common symbol is local only for non-PIE binaries. For
54086 x86-64, common symbol is local only for non-PIE binaries or linker
54087 supports copy reloc in PIE binaries. */
54090 ix86_binds_local_p (const_tree exp)
54092 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
54095 && HAVE_LD_PIE_COPYRELOC != 0)));
54099 /* If MEM is in the form of [base+offset], extract the two parts
54100 of address and set to BASE and OFFSET, otherwise return false. */
54103 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
54107 gcc_assert (MEM_P (mem));
54109 addr = XEXP (mem, 0);
54111 if (GET_CODE (addr) == CONST)
54112 addr = XEXP (addr, 0);
54114 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
54117 *offset = const0_rtx;
54121 if (GET_CODE (addr) == PLUS
54122 && (REG_P (XEXP (addr, 0))
54123 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
54124 && CONST_INT_P (XEXP (addr, 1)))
54126 *base = XEXP (addr, 0);
54127 *offset = XEXP (addr, 1);
54134 /* Given OPERANDS of consecutive load/store, check if we can merge
54135 them into move multiple. LOAD is true if they are load instructions.
54136 MODE is the mode of memory operands. */
54139 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
54140 enum machine_mode mode)
54142 HOST_WIDE_INT offval_1, offval_2, msize;
54143 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
54147 mem_1 = operands[1];
54148 mem_2 = operands[3];
54149 reg_1 = operands[0];
54150 reg_2 = operands[2];
54154 mem_1 = operands[0];
54155 mem_2 = operands[2];
54156 reg_1 = operands[1];
54157 reg_2 = operands[3];
54160 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
54162 if (REGNO (reg_1) != REGNO (reg_2))
54165 /* Check if the addresses are in the form of [base+offset]. */
54166 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
54168 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
54171 /* Check if the bases are the same. */
54172 if (!rtx_equal_p (base_1, base_2))
54175 offval_1 = INTVAL (offset_1);
54176 offval_2 = INTVAL (offset_2);
54177 msize = GET_MODE_SIZE (mode);
54178 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
54179 if (offval_1 + msize != offval_2)
54185 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
54188 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
54189 optimization_type opt_type)
54203 return opt_type == OPTIMIZE_FOR_SPEED;
54206 if (SSE_FLOAT_MODE_P (mode1)
54208 && !flag_trapping_math
54210 return opt_type == OPTIMIZE_FOR_SPEED;
54216 if (SSE_FLOAT_MODE_P (mode1)
54218 && !flag_trapping_math
54221 return opt_type == OPTIMIZE_FOR_SPEED;
54224 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
54231 /* Address space support.
54233 This is not "far pointers" in the 16-bit sense, but an easy way
54234 to use %fs and %gs segment prefixes. Therefore:
54236 (a) All address spaces have the same modes,
54237 (b) All address spaces have the same addresss forms,
54238 (c) While %fs and %gs are technically subsets of the generic
54239 address space, they are probably not subsets of each other.
54240 (d) Since we have no access to the segment base register values
54241 without resorting to a system call, we cannot convert a
54242 non-default address space to a default address space.
54243 Therefore we do not claim %fs or %gs are subsets of generic.
54244 (e) However, __seg_tls uses UNSPEC_TP as the base (which itself is
54245 stored at __seg_tls:0) so we can map between tls and generic. */
54248 ix86_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
54250 return (subset == superset
54251 || (superset == ADDR_SPACE_GENERIC
54252 && subset == ADDR_SPACE_SEG_TLS));
54254 #undef TARGET_ADDR_SPACE_SUBSET_P
54255 #define TARGET_ADDR_SPACE_SUBSET_P ix86_addr_space_subset_p
54258 ix86_addr_space_convert (rtx op, tree from_type, tree to_type)
54260 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
54261 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
54263 /* Conversion between SEG_TLS and GENERIC is handled by adding or
54264 subtracting the thread pointer. */
54265 if ((from_as == ADDR_SPACE_GENERIC && to_as == ADDR_SPACE_SEG_TLS)
54266 || (from_as == ADDR_SPACE_SEG_TLS && to_as == ADDR_SPACE_GENERIC))
54268 machine_mode mode = GET_MODE (op);
54269 if (mode == VOIDmode)
54271 rtx tp = get_thread_pointer (mode, optimize || mode != ptr_mode);
54272 return expand_binop (mode, (to_as == ADDR_SPACE_GENERIC
54273 ? add_optab : sub_optab),
54274 op, tp, NULL, 1, OPTAB_WIDEN);
54279 #undef TARGET_ADDR_SPACE_CONVERT
54280 #define TARGET_ADDR_SPACE_CONVERT ix86_addr_space_convert
54283 ix86_addr_space_debug (addr_space_t as)
54285 /* Fold __seg_tls to __seg_fs or __seg_gs for debugging. */
54286 if (as == ADDR_SPACE_SEG_TLS)
54287 as = DEFAULT_TLS_SEG_REG;
54290 #undef TARGET_ADDR_SPACE_DEBUG
54291 #define TARGET_ADDR_SPACE_DEBUG ix86_addr_space_debug
54293 /* All use of segmentation is assumed to make address 0 valid. */
54296 ix86_addr_space_zero_address_valid (addr_space_t as)
54298 return as != ADDR_SPACE_GENERIC;
54300 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
54301 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
54303 /* Initialize the GCC target structure. */
54304 #undef TARGET_RETURN_IN_MEMORY
54305 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
54307 #undef TARGET_LEGITIMIZE_ADDRESS
54308 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
54310 #undef TARGET_ATTRIBUTE_TABLE
54311 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
54312 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
54313 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
54314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54315 # undef TARGET_MERGE_DECL_ATTRIBUTES
54316 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
54319 #undef TARGET_COMP_TYPE_ATTRIBUTES
54320 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
54322 #undef TARGET_INIT_BUILTINS
54323 #define TARGET_INIT_BUILTINS ix86_init_builtins
54324 #undef TARGET_BUILTIN_DECL
54325 #define TARGET_BUILTIN_DECL ix86_builtin_decl
54326 #undef TARGET_EXPAND_BUILTIN
54327 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
54329 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
54330 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
54331 ix86_builtin_vectorized_function
54333 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
54334 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
54336 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
54337 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
54339 #undef TARGET_VECTORIZE_BUILTIN_GATHER
54340 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
54342 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
54343 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
54345 #undef TARGET_BUILTIN_RECIPROCAL
54346 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
54348 #undef TARGET_ASM_FUNCTION_EPILOGUE
54349 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
54351 #undef TARGET_ENCODE_SECTION_INFO
54352 #ifndef SUBTARGET_ENCODE_SECTION_INFO
54353 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
54355 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
54358 #undef TARGET_ASM_OPEN_PAREN
54359 #define TARGET_ASM_OPEN_PAREN ""
54360 #undef TARGET_ASM_CLOSE_PAREN
54361 #define TARGET_ASM_CLOSE_PAREN ""
54363 #undef TARGET_ASM_BYTE_OP
54364 #define TARGET_ASM_BYTE_OP ASM_BYTE
54366 #undef TARGET_ASM_ALIGNED_HI_OP
54367 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
54368 #undef TARGET_ASM_ALIGNED_SI_OP
54369 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
54371 #undef TARGET_ASM_ALIGNED_DI_OP
54372 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
54375 #undef TARGET_PROFILE_BEFORE_PROLOGUE
54376 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
54378 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
54379 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
54381 #undef TARGET_ASM_UNALIGNED_HI_OP
54382 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
54383 #undef TARGET_ASM_UNALIGNED_SI_OP
54384 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
54385 #undef TARGET_ASM_UNALIGNED_DI_OP
54386 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
54388 #undef TARGET_PRINT_OPERAND
54389 #define TARGET_PRINT_OPERAND ix86_print_operand
54390 #undef TARGET_PRINT_OPERAND_ADDRESS
54391 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
54392 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
54393 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
54394 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
54395 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
54397 #undef TARGET_SCHED_INIT_GLOBAL
54398 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
54399 #undef TARGET_SCHED_ADJUST_COST
54400 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
54401 #undef TARGET_SCHED_ISSUE_RATE
54402 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
54403 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
54404 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
54405 ia32_multipass_dfa_lookahead
54406 #undef TARGET_SCHED_MACRO_FUSION_P
54407 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
54408 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
54409 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
54411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
54412 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
54414 #undef TARGET_MEMMODEL_CHECK
54415 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
54417 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
54418 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
54421 #undef TARGET_HAVE_TLS
54422 #define TARGET_HAVE_TLS true
54424 #undef TARGET_CANNOT_FORCE_CONST_MEM
54425 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
54426 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
54427 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
54429 #undef TARGET_DELEGITIMIZE_ADDRESS
54430 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
54432 #undef TARGET_MS_BITFIELD_LAYOUT_P
54433 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
54436 #undef TARGET_BINDS_LOCAL_P
54437 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
54439 #undef TARGET_BINDS_LOCAL_P
54440 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
54442 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
54443 #undef TARGET_BINDS_LOCAL_P
54444 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
54447 #undef TARGET_ASM_OUTPUT_MI_THUNK
54448 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
54449 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
54450 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
54452 #undef TARGET_ASM_FILE_START
54453 #define TARGET_ASM_FILE_START x86_file_start
54455 #undef TARGET_OPTION_OVERRIDE
54456 #define TARGET_OPTION_OVERRIDE ix86_option_override
54458 #undef TARGET_REGISTER_MOVE_COST
54459 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
54460 #undef TARGET_MEMORY_MOVE_COST
54461 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
54462 #undef TARGET_RTX_COSTS
54463 #define TARGET_RTX_COSTS ix86_rtx_costs
54464 #undef TARGET_ADDRESS_COST
54465 #define TARGET_ADDRESS_COST ix86_address_cost
54467 #undef TARGET_FIXED_CONDITION_CODE_REGS
54468 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
54469 #undef TARGET_CC_MODES_COMPATIBLE
54470 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
54472 #undef TARGET_MACHINE_DEPENDENT_REORG
54473 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
54475 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
54476 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
54478 #undef TARGET_BUILD_BUILTIN_VA_LIST
54479 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
54481 #undef TARGET_FOLD_BUILTIN
54482 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
54484 #undef TARGET_COMPARE_VERSION_PRIORITY
54485 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
54487 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
54488 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
54489 ix86_generate_version_dispatcher_body
54491 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
54492 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
54493 ix86_get_function_versions_dispatcher
54495 #undef TARGET_ENUM_VA_LIST_P
54496 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
54498 #undef TARGET_FN_ABI_VA_LIST
54499 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
54501 #undef TARGET_CANONICAL_VA_LIST_TYPE
54502 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
54504 #undef TARGET_EXPAND_BUILTIN_VA_START
54505 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
54507 #undef TARGET_MD_ASM_ADJUST
54508 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
54510 #undef TARGET_PROMOTE_PROTOTYPES
54511 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
54512 #undef TARGET_SETUP_INCOMING_VARARGS
54513 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
54514 #undef TARGET_MUST_PASS_IN_STACK
54515 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
54516 #undef TARGET_FUNCTION_ARG_ADVANCE
54517 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
54518 #undef TARGET_FUNCTION_ARG
54519 #define TARGET_FUNCTION_ARG ix86_function_arg
54520 #undef TARGET_INIT_PIC_REG
54521 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
54522 #undef TARGET_USE_PSEUDO_PIC_REG
54523 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
54524 #undef TARGET_FUNCTION_ARG_BOUNDARY
54525 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
54526 #undef TARGET_PASS_BY_REFERENCE
54527 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
54528 #undef TARGET_INTERNAL_ARG_POINTER
54529 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
54530 #undef TARGET_UPDATE_STACK_BOUNDARY
54531 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
54532 #undef TARGET_GET_DRAP_RTX
54533 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
54534 #undef TARGET_STRICT_ARGUMENT_NAMING
54535 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
54536 #undef TARGET_STATIC_CHAIN
54537 #define TARGET_STATIC_CHAIN ix86_static_chain
54538 #undef TARGET_TRAMPOLINE_INIT
54539 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
54540 #undef TARGET_RETURN_POPS_ARGS
54541 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
54543 #undef TARGET_LEGITIMATE_COMBINED_INSN
54544 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
54546 #undef TARGET_ASAN_SHADOW_OFFSET
54547 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
54549 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
54550 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
54552 #undef TARGET_SCALAR_MODE_SUPPORTED_P
54553 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
54555 #undef TARGET_VECTOR_MODE_SUPPORTED_P
54556 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
54558 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
54559 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
54560 ix86_libgcc_floating_mode_supported_p
54562 #undef TARGET_C_MODE_FOR_SUFFIX
54563 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
54566 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
54567 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
54570 #ifdef SUBTARGET_INSERT_ATTRIBUTES
54571 #undef TARGET_INSERT_ATTRIBUTES
54572 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
54575 #undef TARGET_MANGLE_TYPE
54576 #define TARGET_MANGLE_TYPE ix86_mangle_type
54579 #undef TARGET_STACK_PROTECT_FAIL
54580 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
54583 #undef TARGET_FUNCTION_VALUE
54584 #define TARGET_FUNCTION_VALUE ix86_function_value
54586 #undef TARGET_FUNCTION_VALUE_REGNO_P
54587 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
54589 #undef TARGET_PROMOTE_FUNCTION_MODE
54590 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
54592 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
54593 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
54595 #undef TARGET_MEMBER_TYPE_FORCES_BLK
54596 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
54598 #undef TARGET_INSTANTIATE_DECLS
54599 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
54601 #undef TARGET_SECONDARY_RELOAD
54602 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
54604 #undef TARGET_CLASS_MAX_NREGS
54605 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
54607 #undef TARGET_PREFERRED_RELOAD_CLASS
54608 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
54609 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
54610 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
54611 #undef TARGET_CLASS_LIKELY_SPILLED_P
54612 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
54614 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
54615 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
54616 ix86_builtin_vectorization_cost
54617 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
54618 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
54619 ix86_vectorize_vec_perm_const_ok
54620 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
54621 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
54622 ix86_preferred_simd_mode
54623 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
54624 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
54625 ix86_autovectorize_vector_sizes
54626 #undef TARGET_VECTORIZE_GET_MASK_MODE
54627 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
54628 #undef TARGET_VECTORIZE_INIT_COST
54629 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
54630 #undef TARGET_VECTORIZE_ADD_STMT_COST
54631 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
54632 #undef TARGET_VECTORIZE_FINISH_COST
54633 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
54634 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
54635 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
54637 #undef TARGET_SET_CURRENT_FUNCTION
54638 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
54640 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
54641 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
54643 #undef TARGET_OPTION_SAVE
54644 #define TARGET_OPTION_SAVE ix86_function_specific_save
54646 #undef TARGET_OPTION_RESTORE
54647 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
54649 #undef TARGET_OPTION_POST_STREAM_IN
54650 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
54652 #undef TARGET_OPTION_PRINT
54653 #define TARGET_OPTION_PRINT ix86_function_specific_print
54655 #undef TARGET_OPTION_FUNCTION_VERSIONS
54656 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
54658 #undef TARGET_CAN_INLINE_P
54659 #define TARGET_CAN_INLINE_P ix86_can_inline_p
54661 #undef TARGET_LEGITIMATE_ADDRESS_P
54662 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
54664 #undef TARGET_LRA_P
54665 #define TARGET_LRA_P hook_bool_void_true
54667 #undef TARGET_REGISTER_PRIORITY
54668 #define TARGET_REGISTER_PRIORITY ix86_register_priority
54670 #undef TARGET_REGISTER_USAGE_LEVELING_P
54671 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
54673 #undef TARGET_LEGITIMATE_CONSTANT_P
54674 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
54676 #undef TARGET_FRAME_POINTER_REQUIRED
54677 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
54679 #undef TARGET_CAN_ELIMINATE
54680 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
54682 #undef TARGET_EXTRA_LIVE_ON_ENTRY
54683 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
54685 #undef TARGET_ASM_CODE_END
54686 #define TARGET_ASM_CODE_END ix86_code_end
54688 #undef TARGET_CONDITIONAL_REGISTER_USAGE
54689 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
54692 #undef TARGET_INIT_LIBFUNCS
54693 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
54696 #undef TARGET_LOOP_UNROLL_ADJUST
54697 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
54699 #undef TARGET_SPILL_CLASS
54700 #define TARGET_SPILL_CLASS ix86_spill_class
54702 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
54703 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
54704 ix86_simd_clone_compute_vecsize_and_simdlen
54706 #undef TARGET_SIMD_CLONE_ADJUST
54707 #define TARGET_SIMD_CLONE_ADJUST \
54708 ix86_simd_clone_adjust
54710 #undef TARGET_SIMD_CLONE_USABLE
54711 #define TARGET_SIMD_CLONE_USABLE \
54712 ix86_simd_clone_usable
54714 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
54715 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
54716 ix86_float_exceptions_rounding_supported_p
54718 #undef TARGET_MODE_EMIT
54719 #define TARGET_MODE_EMIT ix86_emit_mode_set
54721 #undef TARGET_MODE_NEEDED
54722 #define TARGET_MODE_NEEDED ix86_mode_needed
54724 #undef TARGET_MODE_AFTER
54725 #define TARGET_MODE_AFTER ix86_mode_after
54727 #undef TARGET_MODE_ENTRY
54728 #define TARGET_MODE_ENTRY ix86_mode_entry
54730 #undef TARGET_MODE_EXIT
54731 #define TARGET_MODE_EXIT ix86_mode_exit
54733 #undef TARGET_MODE_PRIORITY
54734 #define TARGET_MODE_PRIORITY ix86_mode_priority
54736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
54737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
54739 #undef TARGET_LOAD_BOUNDS_FOR_ARG
54740 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
54742 #undef TARGET_STORE_BOUNDS_FOR_ARG
54743 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
54745 #undef TARGET_LOAD_RETURNED_BOUNDS
54746 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
54748 #undef TARGET_STORE_RETURNED_BOUNDS
54749 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
54751 #undef TARGET_CHKP_BOUND_MODE
54752 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
54754 #undef TARGET_BUILTIN_CHKP_FUNCTION
54755 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
54757 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
54758 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
54760 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
54761 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
54763 #undef TARGET_CHKP_INITIALIZE_BOUNDS
54764 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
54766 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
54767 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
54769 #undef TARGET_OFFLOAD_OPTIONS
54770 #define TARGET_OFFLOAD_OPTIONS \
54771 ix86_offload_options
54773 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
54774 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
54776 #undef TARGET_OPTAB_SUPPORTED_P
54777 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
54779 struct gcc_target targetm = TARGET_INITIALIZER;
54781 #include "gt-i386.h"